# Import Packages

In [None]:
library(opploansanalytics)
load.packages()

In [None]:
source('lde-raw-lead-validation.r')

# Get Data

In [None]:
### Look at Accepted Lead --> App: What are the ranges in values accepted?

In [None]:
getPayloadsAll = function (start.date = '2020-01-01', end.date = '2020-02-01', accepted = 'true', partner_id = NA, limit = NA) {
    
    queryReporting(paste0(
    "
    select
        lde.lead_id
        , lde.lead_time at time zone 'America/Chicago' as lead_time
        , c_app.name
        , c_app.contact
        , c_ofl.email
        , c_app.createddate at time zone 'America/Chicago' as appldate
        , c_am.name as admethod
        , c_am.external_id as partnerid
        , lde.raw_lead
    from
        lde4.leads as lde
        inner join
            cloudlending.advertising_method as c_am
            on lde.partnerid = c_am.external_id
            ", ifelse(is.na(partner_id), "", paste0("and c_am.external_id = ", partner_id)), "
        left join
            cloudlending.applications as c_app
            on lde.lead_id = c_app.lde4_lead_id
        left join
            cloudlending.contact as c_ofl
            on c_app.contact = c_ofl.id
    where
        lde.lead_time at time zone 'America/Chicago' >= '", start.date, "'::date
        and lde.lead_time at time zone 'America/Chicago' < '", end.date, "'::date
        ", ifelse(is.na(accepted), "", paste0("and accepted = ", accepted)), "
    ", ifelse(is.na(limit), "", paste0("limit ", limit))
    ))
    
}

In [None]:
getRegInputsAll = function (start.date = '2020-01-01', end.date = '2020-02-01', partner_id = NA, limit = NA) {
        
    raw.query = querySnowflake(paste0(
        "
        select
            reg.event_id
            , reg.session_id
            , reg.user_id
            , users_.\"identity\" as contact
            , reg.\"time\"
            , reg.email
            , reg.partner_id
            , reg.neededinputs
        from
            heap_prod.heap._viewed_lead_registration_ as reg
            inner join
                ods_prod.cloudlending.advertising_method__c as c_am
                on reg.partner_id = c_am.external_id__c
                ", ifelse(is.na(partner_id), "", paste0("and c_am.external_id__c = ", partner_id)), "
            left join
                heap_prod.heap.users as users_
                on reg.user_id = users_.user_id
        where
            reg.\"time\" >= '", start.date, "'::date
            and reg.\"time\" < '", end.date, "'::date
        ", ifelse(is.na(limit), "", paste0("limit ", limit))
    ))
    colnames(raw.query) %<>% str_to_lower()
    
    
    raw.query %>%
        mutate(
            fields = neededinputs %>%
                map(
                    .f = function (x) {
                        x %>%
                            str_match_all(
                                pattern = regex('registration\\[(.*?)\\]')
                            ) %>%
                            .[[1]] %>% .[ ,2]
                    }
                )
        )
    
    
}

# Evaluate

In [None]:
sample.reg = getRegInputsAll(partner_id = 319, limit = NA)

In [None]:
sample.payloads = getPayloadsAll(partner_id = 319, accepted = TRUE, limit = NA)

In [None]:
sample.evaluation = 
    suppressWarnings({
        sample.payloads %>% getEvaluationDF()
    })

In [None]:
sample.values = 
    do.call(
        rbind,
        sample.evaluation %>% .$validate.values %>% keep(.p = function (x) {ncol(x) > 1})
    ) %>% 
    mutate(
        value.codes = value.codes %>%
            map(
                .f = ~ .x %>% max()
            ) %>% unlist()
    )

### Int - Looks fine

In [None]:
sample.values.summary.int =
    sample.values %>%
        group_by(
            key,
            value.codes,
            value
        ) %>% 
        summarize(
            count = n()
        ) %>% 
        ungroup() %>% 
        filter(
            key %in% c(
                'accountType',
#                 'accountLength',
                'payrollFrequency'
            )
        ) %>% 
        arrange(
            key,
            value.codes %>% desc(),
            count %>% desc()
        )
sample.values.summary.int

### String

In [None]:
sample.values.summary.str =
    sample.values %>%
        group_by(
            key,
            value
        ) %>% 
        summarize(
            count = n()
        ) %>% 
        ungroup() %>% 
        filter(
            key %in% c(
                'coutnryCode',
                'incomeType',
                'payrollType'
            )
        ) %>% 
        arrange(
            key,
            count %>% desc()
        )
sample.values.summary.str

##### Income Type - Reprompted if not in CL: Looks fine

In [None]:
income.types.cl =
    queryReporting(
        "
        select
            income_type
            , count(*)
        from
            cloudlending.employment_information
        where
            createddate >= '2020-01-01'::date
        group by
            1
        order by
            2 desc
        "
    ) %>%
    .$income_type

In [None]:
sample.income.types =
    sample.evaluation %>%
        filter(
            validate.values %>% map(ncol) %>% unlist() > 1
        ) %>% 
        mutate(
            income.type = validate.values %>%
                map(
                    .f = function (x) {
                        x %>% 
                            filter(
                                key == 'incomeType'
                            ) %>% 
                            .$value
                    }
                ),
            income.type.in.cl = income.type %>% 
                str_match(
                    regex('^\\\"(.*)\\\"$')
                ) %>% .[ ,2]
        ) %>% 
        filter(
            ! income.type.in.cl %in% income.types.cl
        )

In [None]:
sample.joined =
    sample.income.types %>%
        inner_join(
            sample.reg %>%
                group_by(
                    email
                ) %>% 
                mutate(
                    occurrences = n()
                ) %>% 
                ungroup() %>% 
                filter(
                    occurrences == 1
                ),
            by = 'email'
        ) %>% 
        mutate(
            input.income.type = fields %>% 
                map(
                    ~ 'income_type' %in% .x
                ) %>% unlist()
        )

In [None]:
sample.joined %>% 
    group_by(
        input.income.type
    ) %>% 
    summarize(
        n()
    )

##### Payroll Type - Reprompted if not in CL: Looks fine

In [None]:
payroll.types.cl =
    queryReporting(
        "
        select
            payroll_type
            , count(*)
        from
            cloudlending.employment_information
        where
            createddate >= '2020-01-01'::date
        group by
            1
        order by
            2 desc
        "
    ) %>%
    filter(
        !is.na(payroll_type)
    ) %>% 
    .$payroll_type

In [None]:
sample.payroll.types =
    sample.evaluation %>%
        filter(
            validate.values %>% map(ncol) %>% unlist() > 1
        ) %>% 
        mutate(
            payroll.type = validate.values %>%
                map(
                    .f = function (x) {
                        x %>% 
                            filter(
                                key == 'payrollType'
                            ) %>% 
                            .$value
                    }
                ),
            payroll.type.in.cl = payroll.type %>% 
                str_match(
                    regex('^\\\"(.*)\\\"$')
                ) %>% .[ ,2]
        ) %>% 
        filter(
            ! payroll.type.in.cl %in% c(payroll.types.cl, 'DirectDeposit')
        )

In [None]:
sample.joined =
    sample.payroll.types %>%
        inner_join(
            sample.reg %>%
                group_by(
                    email
                ) %>% 
                mutate(
                    occurrences = n()
                ) %>% 
                ungroup() %>% 
                filter(
                    occurrences == 1
                ),
            by = 'email'
        ) %>% 
        mutate(
            input.payroll.type = fields %>% 
                map(
                    ~ 'payroll_type' %in% .x
                ) %>% unlist()
        )

In [None]:
sample.joined %>% 
    group_by(
        input.payroll.type
    ) %>% 
    summarize(
        n()
    )