# Import Packages

In [None]:
library(opploansanalytics)
load.packages()

# Import Data

In [None]:
getAdmethodList = function () {
    
    queryReporting(
    "
    select
        admethod
        , count(*) as volume
    from
        public.all_allapps
    where
        refi = 'N'
        and left(denygrp,1) > '2'
        and appldate >= '2019-10-01'::date
        and appldate < '2020-01-01'::date
    group by
        1
    order by
        2 desc
    "
    )
}

In [None]:
getTestPayloads = function (admethod, start.date = '2019-12-01', end.date = '2020-01-01') {
    
    queryReporting(paste0(
    "
    select
        lde.lead_id
        , lde.lead_time at time zone 'America/Chicago' as lead_time
        , c_app.name
        , c_app.createddate at time zone 'America/Chicago' as appldate
        , c_am.name as admethod
        , lde.raw_lead
    from
        lde4.leads as lde
        inner join
            cloudlending.advertising_method as c_am
            on lde.partnerid = c_am.external_id
        left join
            cloudlending.applications as c_app
            on lde.lead_id = c_app.lde4_lead_id
    where
        c_am.name = '", admethod, "'
        and lde.lead_time at time zone 'America/Chicago' >= '", start.date, "'::date
        and lde.lead_time at time zone 'America/Chicago' < '", end.date, "'::date
    order by
        random()
    limit 100
    "
    ))
    
}

# Process Data

In [None]:
setEvaluationVariables = function () {

    
    ##  Does the proposed payload AT LEAST contain the objects expected?  ##
    contains.all.fields <<- paste0(
        '^\\{',
        '(?=.*\\\"isProduction\\\":)',
        '(?=.*\\\"language\\\":)',
        '(?=.*\\\"currency\\\":)',
        '(?=.*\\\"socialSecurityNumber\\\":)',
        '(?=.*\\\"leadOfferId\\\":)',
        '(?=.*\\\"email\\\":)',
        '(?=.*\\\"stateCode\\\":)',
        '(?=.*\\\"grossMonthlyIncome\\\":)',
        '(?=.*\\\"personalInfo\\\":)',
        '(?=.*\\\"firstName\\\":)',
        '(?=.*\\\"lastName\\\":)',
        '(?=.*\\\"dateOfBirth\\\":)',
        '(?=.*\\\"address\\\":)',
        '(?=.*\\\"streetAddress\\\":)',
        '(?=.*\\\"city\\\":)',
        '(?=.*\\\"zip\\\":)',
        '(?=.*\\\"countryCode\\\":)',
        '(?=.*\\\"mobilePhone\\\":)',
        '(?=.*\\\"homePhone\\\":)',
        '(?=.*\\\"bankInfo\\\":)',
        '(?=.*\\\"bankName\\\":)',
        '(?=.*\\\"abaRoutingNumber\\\":)',
        '(?=.*\\\"accountNumber\\\":)',
        '(?=.*\\\"accountType\\\":)',
        '(?=.*\\\"accountLength\\\":)',
        '(?=.*\\\"incomeInfo\\\":)',
        '(?=.*\\\"incomeType\\\":)',
        '(?=.*\\\"payrollType\\\":)',
        '(?=.*\\\"payrollFrequency\\\":)',
        '(?=.*\\\"lastPayrollDate\\\":)',
        '(?=.*\\\"employmentInfo\\\":)',
        '(?=.*\\\"hireDate\\\":)',
        '(?=.*\\\"requestedLoanAmount\\\":)',
        '.*\\}$'
    )

    
    ##  Does the proposed payload contain any objects NOT expected?  ##
    match.key <<- '\\\"([a-zA-Z]+)\\\":'
    expected.keys = c(
        'isProduction',
        'language',
        'currency',
        'socialSecurityNumber',
        'leadOfferId',
        'email',
        'stateCode',
        'grossMonthlyIncome',
    #     'personalInfo',
        'firstName',
        'lastName',
        'dateOfBirth',
        'address',
        'streetAddress',
        'city',
        'zip',
        'countryCode',
        'mobilePhone',
        'homePhone',
    #     'bankInfo',
        'bankName',
        'abaRoutingNumber',
        'accountNumber',
        'accountType',
        'accountLength',
    #     'incomeInfo',
        'incomeType',
        'payrollType',
        'payrollFrequency',
        'lastPayrollDate',
    #     'employmentInfo',
        'employerName',
        'hireDate',
        'requestedLoanAmount'
    )

    
    ##  What are the key value pairs?  ##
    values <<- '\\{?\\\".*?\\":\\s(?!=\\{)([^\\{]*?)[,\\}]'
    keys <<- '\\\"([^,]*?)\\\":'

    
    ##  What data types do we expect?  ##
    quoted.string <<- '\\\".*?\\\"'
    quoted.numeric <<- '\\\"\\d+\\\"'
    quoted.boolean <<- '\\\"(?:true|false)\\\"'
    quoted.null <<- '\\\"null\\\"'
    quoted.empty <<- '\\\"(?:\\s+)?\\\"'
    quoted.date <<- '\\\"\\d{8}\\\"'
    unquoted.numeric <<- '[\\d\\.]+(?![\\\"@\\w\\s\\-])'
    unquoted.decimal <<- '(?:\\d+\\.\\d+)+(?![\\\"@\\w\\s\\-])'
    unquoted.integer <<- '(?<=(?:\\:\\s))\\d+(?![\\\"@\\w\\s\\-\\.])'
    unquoted.boolean <<- '(?<!\\\")(?:true|false)(?!=\\\")'
    unquoted.null <<- '(?<!\\\")null(?!=\\\")'

    expect.quoted.string <<- c(
        'language',
        'currency',
        'campaignId',
        'socialSecurityNumber',
        'leadOfferId',
        'email',
        'stateCode',
        'firstName',
        'lastName',
        'streetAddress',
        'city',
        'zip',
        'countryCode',
        'mobilePhone',
        'homePhone',
        'bankName',
        'abaRoutingNumber',
        'accountNumber',
        'incomeType',
        'payrollType',
        'employerName'
    )

    expect.quoted.date <<- c(
        'dateOfBirth',
        'lastPayrollDate',
        'hireDate'
    )

    expect.unquoted.boolean <<- c(
        'isProduction'
    )

    expect.unquoted.numeric <<- c(
        'grossMonthlyIncome'
    )

    expect.unquoted.integer <<- c(
        'accountType',
        'accountLength',
        'payrollFrequency',
        'requestedLoanAmount'
    )
    
}

In [None]:
getEvaluationDF = function (test.payloads) {

    test.payloads %>%
        mutate(

            ##  Transform to DF  ##
            keys = raw_lead %>%
                str_match_all(
                    pattern = regex(keys)
                ) %>%
                map(
                    .f = function (x) {
                        x %>% .[ ,2] %>%
                            unlist() %>%
                            as.data.frame(stringsAsFactors = FALSE) %>%
                            filter(
                                !. %in% c('personalInfo', 'address', 'bankInfo', 'incomeInfo', 'employmentInfo')
                            ) %>% .$.
                    }
                ),

            values = raw_lead %>%
                str_match_all(
                    pattern = regex(values)
                ) %>%
                map(
                    .f = function (x) {
                        x %>% .[ ,2]
                    }
                ),

            json.df = map2(
                .x = keys,
                .y = values,
                .f = function (x,y) {
                    data.frame(
                        key = x,
                        value = y,
                        stringsAsFactors = FALSE
                    )
                }
            ),

    #         json.df2 = raw_lead %>%
    #             map(
    #                 .f = function (x) {
    #                     df = x %>%
    #                         fromJSON() %>%
    #                         unlist() %>%
    #                         as.data.frame(stringsAsFactors = FALSE) %>% 
    #                         rownames_to_column(
    #                             var = "key"
    #                         ) %>% 
    #                         select(
    #                             key,
    #                             value = "."
    #                         ) %>% 
    #                         mutate(
    #                             key = key %>% 
    #                                 str_match(
    #                                     pattern = regex(".*\\.(.*?)$")
    #                                 ) %>% .[ ,2] %>%
    #                                 coalesce(key)
    #                         )
    #                 }
    #             ),



            ##  Keys Comparison  ##
            keys.missing = json.df %>%
                map(
                    .f = function (x) {
                        expected.keys[ which(!expected.keys %in% unlist(x %>% .$key)) ]

                    }
                ),

            keys.extra = json.df %>%
                map(
                    .f = function (x) {
                        unlist(x %>% .$key)[ which(!unlist(x %>% .$key) %in% expected.keys) ]

                    }
                ),



            ##  Values Validation - Formatting  ## Which can be null?
            format.correct = json.df %>% 
    #         format.correct = json.df2 %>% 
                map(
                    .f = function (x) {
                        rbind(
                            x %>% transmute(
                                key,
                                correct.format = case_when(
                                    key %in% expect.quoted.string ~ value %>% str_detect(regex(quoted.string)),
                                    key %in% expect.quoted.date ~ value %>% str_detect(regex(quoted.date)),
                                    key %in% expect.unquoted.boolean ~ value %>% str_detect(regex(unquoted.boolean)),
                                    key %in% expect.unquoted.numeric ~ value %>% str_detect(regex(unquoted.numeric)),
                                    key %in% expect.unquoted.integer ~ value %>% str_detect(regex(unquoted.integer))
                                )
                            )
                        )
                    }
                )

        )
    
}

# Execution

In [None]:
admethods = getAdmethodList()
admethods %>% head(10)

In [None]:
test.payloads = 'LenderEdge 4' %>% getTestPayloads()
test.payloads %>% slice(1) %>% .$raw_lead %>% prettify()
test.payloads %>% slice(1) %>% .$raw_lead %>% cat()
test.payloads %>% slice(1)

In [None]:
setEvaluationVariables()

In [None]:
# test.evaluated[ ,c(1,2,5)]
# test.evaluated$json.df %>% .[[1]]
# print(paste("Missing Key:", test.evaluated$keys.missing %>% .[[1]]))
# print(paste("Extra Key:", test.evaluated$keys.extra %>% .[[1]]))
# test.evaluated$format.correct %>% .[[1]]