# Import Packages

In [None]:
library(opploansanalytics)
load.packages()

# Import Data

In [None]:
getAdmethodList = function () {
    
    queryReporting(
    "
    select
        admethod
        , count(*) as volume
    from
        public.all_allapps
    where
        refi = 'N'
        and left(denygrp,1) > '2'
        and appldate >= '2019-10-01'::date
        and appldate < '2020-01-01'::date
    group by
        1
    order by
        2 desc
    "
    )
}

In [None]:
getTestPayloads = function (admethod, start.date = '2019-12-01', end.date = '2020-01-01', limit = 100) {
    
    queryReporting(paste0(
    "
    select
        lde.lead_id
        , lde.lead_time at time zone 'America/Chicago' as lead_time
        , c_app.name
        , c_app.createddate at time zone 'America/Chicago' as appldate
        , c_am.name as admethod
        , lde.raw_lead
    from
        lde4.leads as lde
        inner join
            cloudlending.advertising_method as c_am
            on lde.partnerid = c_am.external_id
        left join
            cloudlending.applications as c_app
            on lde.lead_id = c_app.lde4_lead_id
    where
        c_am.name = '", admethod, "'
        and lde.lead_time at time zone 'America/Chicago' >= '", start.date, "'::date
        and lde.lead_time at time zone 'America/Chicago' < '", end.date, "'::date
    order by
        random()
    limit ", limit
    ))
    
}

# Process Data

In [None]:
setEvaluationVariables = function () {

    
    ##  Does the proposed payload AT LEAST contain the objects expected?  ##
#     contains.all.fields <<- paste0(
#         '^\\{',
#         '(?=.*\\\"isProduction\\\":)',
#         '(?=.*\\\"language\\\":)',
#         '(?=.*\\\"currency\\\":)',
#         '(?=.*\\\"socialSecurityNumber\\\":)',
#         '(?=.*\\\"leadOfferId\\\":)',
#         '(?=.*\\\"email\\\":)',
#         '(?=.*\\\"stateCode\\\":)',
#         '(?=.*\\\"grossMonthlyIncome\\\":)',
#         '(?=.*\\\"personalInfo\\\":)',
#         '(?=.*\\\"firstName\\\":)',
#         '(?=.*\\\"lastName\\\":)',
#         '(?=.*\\\"dateOfBirth\\\":)',
#         '(?=.*\\\"address\\\":)',
#         '(?=.*\\\"streetAddress\\\":)',
#         '(?=.*\\\"city\\\":)',
#         '(?=.*\\\"zip\\\":)',
#         '(?=.*\\\"countryCode\\\":)',
#         '(?=.*\\\"mobilePhone\\\":)',
#         '(?=.*\\\"homePhone\\\":)',
#         '(?=.*\\\"bankInfo\\\":)',
#         '(?=.*\\\"bankName\\\":)',
#         '(?=.*\\\"abaRoutingNumber\\\":)',
#         '(?=.*\\\"accountNumber\\\":)',
#         '(?=.*\\\"accountType\\\":)',
#         '(?=.*\\\"accountLength\\\":)',
#         '(?=.*\\\"incomeInfo\\\":)',
#         '(?=.*\\\"incomeType\\\":)',
#         '(?=.*\\\"payrollType\\\":)',
#         '(?=.*\\\"payrollFrequency\\\":)',
#         '(?=.*\\\"lastPayrollDate\\\":)',
#         '(?=.*\\\"employmentInfo\\\":)',
#         '(?=.*\\\"hireDate\\\":)',
#         '(?=.*\\\"requestedLoanAmount\\\":)',
#         '.*\\}$'
#     )

    
    ##  Does the proposed payload contain any objects NOT expected?  ##
    expected.keys <<- c(
        'isProduction',
        'language',
        'currency',
        'socialSecurityNumber',
        'leadOfferId',
        'email',
        'stateCode',
        'grossMonthlyIncome',
    #     'personalInfo',
        'firstName',
        'lastName',
        'dateOfBirth',
        'address',
        'streetAddress',
        'city',
        'zip',
        'countryCode',
        'mobilePhone',
        'homePhone',
    #     'bankInfo',
        'bankName',
        'abaRoutingNumber',
        'accountNumber',
        'accountType',
        'accountLength',
    #     'incomeInfo',
        'incomeType',
        'payrollType',
        'payrollFrequency',
        'lastPayrollDate',
        'nextPayrollDate',
    #     'employmentInfo',
        'employerName',
        'hireDate',
        'requestedLoanAmount'
    )

    
    ##  What are the key value pairs?  ##
    values <<- '\\{?\\\".*?\\":\\s(?!=\\{)([^\\{]*?)[,\\}]'
    keys <<- '\\\"([^,]*?)\\\":'

    
    ##  What dependencies exist between key/value pairs?  ##

    
    ##  What data types do we expect?  ##
    
    #   Always Bad   #
    quoted.null <<- '^\\\"null\\\"$'
    quoted.empty <<- '^\\\"(?:\\s+)?\\\"$'
    quoted.boolean <<- '^\\\"(?:true|false)\\\"$'
    
    quoted.string <<- '^\\\".*?\\\"$'
    quoted.numeric <<- '^\\\"\\d+\\\"$'
    quoted.zip <<- '^\\\"\\d{5}\\\"$'
    quoted.date <<- '^\\\"\\d{8}\\\"$'
    quoted.ssn.aba <<- '^\\\"\\d{9}\\\"$'
    quoted.phone <<- '^\\\"(?:\\+\\d)?\\d{10}\\\"$'
    quoted.name <<- '^\\\"[a-zA-Z]+\\\"$'
    quoted.email <<- '^\\\".+@.+\\.\\w+\\\"$'
    quoted.state <<- '^\\\"(?:AL|AK|AZ|AR|CA|CO|CT|DE|FL|GA|HI|ID|IL|IN|IA|KS|KY|LA|ME|MD|MA|MI|MN|MS|MO|MT|NE|NV|NH|NJ|NM|NY|NC|ND|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VT|VA|WA|WV|WI|WY|DC)\\\"$'
    quoted.currency <<- '^\\\"USD\\\"$'
    
    unquoted.numeric <<- '[\\d\\.]+(?![\\\"@\\w\\s\\-])'
    unquoted.float.positive <<- '(?<!\\-)(?:\\d+\\.?\\d*)(?=[\\,\\}])'
    unquoted.decimal <<- '(?:\\d+\\.\\d+)+(?![\\\"@\\w\\s\\-])'
    unquoted.integer <<- '(?<=(?:\\:\\s))\\d+(?![\\\"@\\w\\s\\-\\.])'
    unquoted.boolean <<- '(?<!\\\")(?:true|false)(?!=\\\")'
    unquoted.null <<- '(?<!\\\")null(?!=\\\")'
    
    optional.quoted.float.positive <<- '\\\"?(?<!\\-)(?:\\d+\\.?\\d*\\\"?)(?=[\\,\\}])'

    expect.quoted.string <<- c(
        'campaignId',
        'leadOfferId',
        'streetAddress',
        'bankName'
    )
    
    expect.quoted.employerName <<- c(
        'employerName'
    )
    
    expect.quoted.numeric <<- c(
        'accountNumber'
    )
    
    expect.quoted.name <<- c(
        'firstName',
        'lastName',
        'city',
        'language',
        'countryCode',
        'incomeType',
        'payrollType'
    )
    
    expect.quoted.currency <<- c(
        'currency'
    )

    expect.quoted.zip <<- c(
        'zip'
    )

    expect.quoted.date.past <<- c(
        'dateOfBirth',
        'lastPayrollDate',
        'hireDate'
    )

    expect.quoted.date.future <<- c(
        'nextPayrollDate'
    )

    expect.quoted.ssn.aba <<- c(
        'socialSecurityNumber',
        'abaRoutingNumber'
    )

    expect.quoted.phone <<- c(
        'mobilePhone',
        'homePhone'
    )

    expect.quoted.email <<- c(
        'email'
    )

    expect.quoted.state <<- c(
        'stateCode'
    )

    expect.unquoted.boolean <<- c(
        'isProduction'
    )

    expect.unquoted.float.positive <<- c(
        'grossMonthlyIncome'
    )

    expect.unquoted.integer <<- c(
        'accountType',
        'accountLength',
        'payrollFrequency'
    )
    
    expect.optional.quoted.float.positive <<- c(
        'requestedLoanAmount'
    )
    
}

In [None]:
validateValues = function (key, value, valid.format, ...) {

    args = list(...)
    
    ###  Quoted Date Validation  ###
    if (key %in% expect.quoted.date.past) {
        ifelse(
            valid.format &
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() %>% is.na() &
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() %>% is.integer(),
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() <= args$present.time %>% as.integer(),
            FALSE
        )
        
    } else if (key %in% expect.quoted.date.future) {
        ifelse(
            valid.format &
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() %>% is.na() &
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() %>% is.integer(),
            value %>% str_match(regex('^\\\"(\\d{8})\\\"$')) %>% .[ ,2] %>% as.integer() >= args$present.time %>% as.integer(),
            FALSE
        )
        
    } else if (key %in% expect.quoted.employerName) {
        ifelse(
            valid.format,
            value != args$bankName || length(args$bankName) == 0,
            FALSE
        )
        
    } else {
        NA
    }

}

In [None]:
getEvaluationDF = function (test.payloads) {

    setEvaluationVariables()
    
    
    test.payloads %>%
        mutate(

            ###  DF Components  ###
            keys = raw_lead %>%
                str_match_all(
                    pattern = regex(keys)
                ) %>%
                map(
                    .f = function (x) {
                        x %>% .[ ,2] %>%
                            unlist() %>%
                            as.data.frame(stringsAsFactors = FALSE) %>%
                            filter(
                                !. %in% c('personalInfo', 'address', 'bankInfo', 'incomeInfo', 'employmentInfo')
                            ) %>% .$.
                    }
                ),

            values = raw_lead %>%
                str_match_all(
                    pattern = regex(values)
                ) %>%
                map(
                    .f = function (x) {
                        x %>% .[ ,2]
                    }
                ),

            
            ###  Create the DF  ###
            json.df = 
                map2(
                
                    ##  Combine Key Value Pairs  ##
                    .x = keys,
                    .y = values,
                    .f = function (x,y) {
                        
                        data.frame(
                            key = x,
                            value = y,
                            stringsAsFactors = FALSE
                        )
                    }
                
                ) %>% 
            
                map(
                    
                    ##  Valid Format = Value Formatting Validation  ##
                    .f = function (x) {
                        
                        rbind(
                            x %>%
                                mutate(
                                    valid.format = case_when(
                                        value %>% str_detect(regex(quoted.empty)) ~ FALSE,
                                        value %>% str_detect(regex(quoted.null)) ~ FALSE,
                                        value %>% str_detect(regex(quoted.boolean)) ~ FALSE,
                                        key %in% expect.quoted.string ~ value %>% str_detect(regex(quoted.string)),
                                        key %in% expect.quoted.employerName ~ value %>% str_detect(regex(quoted.string)),
                                        key %in% expect.quoted.numeric ~ value %>% str_detect(regex(quoted.numeric)),
                                        key %in% expect.quoted.name ~ value %>% str_detect(regex(quoted.name)),
                                        key %in% expect.quoted.zip ~ value %>% str_detect(regex(quoted.zip)),
                                        key %in% expect.quoted.date.past ~ value %>% str_detect(regex(quoted.date)),
                                        key %in% expect.quoted.date.future ~ value %>% str_detect(regex(quoted.date)),
                                        key %in% expect.quoted.ssn.aba ~ value %>% str_detect(regex(quoted.ssn.aba)),
                                        key %in% expect.quoted.phone ~ value %>% str_detect(regex(quoted.phone)),
                                        key %in% expect.quoted.email ~ value %>% str_detect(regex(quoted.email)),
                                        key %in% expect.quoted.state ~ value %>% str_detect(regex(quoted.state)),
                                        key %in% expect.quoted.currency ~ value %>% str_detect(regex(quoted.currency)),
                                        key %in% expect.unquoted.boolean ~ value %>% str_detect(regex(unquoted.boolean)),
                                        key %in% expect.unquoted.float.positive ~ value %>% str_detect(regex(unquoted.float.positive)),
                                        key %in% expect.unquoted.integer ~ value %>% str_detect(regex(unquoted.integer)),
                                        key %in% expect.optional.quoted.float.positive ~ value %>% str_detect(regex(optional.quoted.float.positive))
                                    )
                                )
                        )
                        
                    }
                    
                ),
                
                json.df = pmap(
                    
                    ##  Valid Value = Value Value Validation  ##
                    .l = list(json.df, lead_time, admethod),
                    .f = function (a,b,c) {
                        
                        present.time = paste0(
                            year(b), month(b), day(b)
                        )
                        
                        bankName = a %>% filter(key == 'bankName') %>% .$value

                        a %>% 
                            mutate(
                                valid.value = pmap(
#                                     .x = key,
#                                     .y = value,
                                    .l = list(key, value, valid.format),
                                    .f = function (i, j, k) {

                                        validateValues( ##  pass in many arguments; validateValues will dictate for which rules that the arguments will be used.
                                            key = i,
                                            value = j,
                                            valid.format = k,
                                            present.time = present.time,
                                            bankName = bankName
                                            
                                        )
                                    }
                                )
                                
                            )
                    }
                ),



            ##  Keys Comparison  ##
            keys.missing = json.df %>%
                map(
                    .f = function (x) {
                        expected.keys[ which(!expected.keys %in% unlist(x %>% .$key)) ]
                    }
                ),

            keys.extra = json.df %>%
                map(
                    .f = function (x) {
                        unlist(x %>% .$key)[ which(!unlist(x %>% .$key) %in% expected.keys) ]
                    }
                )
        )
    
}

# Execution

In [None]:
# getAdmethodList() %>% head(10)
# test.payloads = 'LenderEdge 4' %>% getTestPayloads(limit = 100)
test.payloads = 'Monevo' %>% getTestPayloads(limit = 10)

In [None]:
evaluation = test.payloads %>% getEvaluationDF()
# evaluation %>% slice(1)
evaluation %>% slice(1) %>% .$json.df %>% .[[1]]