## Notes

Questions:
    * Are we reading the raw lead correctly?
    * Do certain field values correlate with higher conversion?
        * Within Partner i.
        * Across Partners.

Additional Data:
    * URL
    * Click ID
    * Funnel Performance. Denial Reason, conversion boolean



LenderEdge
    * A. Raw DOB is standard date format, without dashes. A couple of blanks.
    * B. When raw_incometype != 'Employment', defaults to 'OtherTaxableIncome'. ~30k, check time series.
    * C. Often writes NA accountnumber when raw_accountnumber contains a value. ~500k, check time series.
    * D. Blank, != 9 digit, and mail merge fields passed into Routing Number.

1B) 
df.lenderedge.comparison %>%
    group_by(
        raw_incometype,
        incometype
    ) %>% 
    summarize(
        n()
    )

# Import Libraries

In [None]:
library(opploansanalytics)
load.packages()

# Import and Clean Data

### Leads

In [None]:
getAdmethod = function () {
    
    queryReporting(
"
select
    c_adm.name
    , count(*)
from
    cloudlending.applications as c_app
    inner join
        cloudlending.advertising_method as c_adm
        on c_app.advertising_method = c_adm.id
        and right(c_adm.name,1) = '4'
where
    date_trunc('month', c_app.createddate) = '2019-09-01'::date
group by
    1
order by
    2 desc
limit 10    
"
    )
}

In [None]:
getLeadsDF = function (admethod.name, timestart, timeend) {
    
    queryReporting(
        paste0(
"
select

    --  Identifiers  --
    lde.lead_id
    , lde.partnerid
    , lde.leadofferid
    , lde.lead_time
    , c_am.name
    , lde.accepted
    , lde.reason
    , lde.offer_amount
    --  Banking  --
    , lde.abaroutingnumber
    , lde.accountnumber
    , lde.bankname
    --  Employment  --
    , lde.grossmonthlyincome
    , lde.incometype
    , lde.lastpayrolldate
    , lde.payrollfrequency
    , lde.payrolltype
    , lde.work_hiredate
    --  Contact  --
    , lde.dateofbirth
    , lde.statecode


    --  Raw Lead  --
    , lde.raw_lead
        --  ID  --
    , lde.raw_lead ->> 'leadOfferId' as raw_leadofferid
    , lde.raw_lead ->> 'requestedLoanAmount' as raw_loan_request
        --  Banking  --
    , lde.raw_lead -> 'bankInfo' ->> 'abaRoutingNumber' as raw_abaroutingnumber
    , lde.raw_lead -> 'bankInfo' ->> 'accountNumber' as raw_accountnumber
    , lde.raw_lead -> 'bankInfo' ->> 'bankName' as raw_bankname
    , lde.raw_lead -> 'bankInfo' ->> 'accountType' as raw_accounttype
    , lde.raw_lead -> 'bankInfo' ->> 'accountLength' as raw_accountlength
        --  Employment  --
    , lde.raw_lead ->> 'grossMonthlyIncome' as raw_grossmonthlyincome
    , lde.raw_lead -> 'incomeInfo' ->> 'incomeType' as raw_incometype
    , lde.raw_lead -> 'incomeInfo' ->> 'lastPayrollDate' as raw_lastpayrolldate
    , lde.raw_lead -> 'incomeInfo' ->> 'payrollFrequency' as raw_payrollfrequency
    , lde.raw_lead -> 'incomeInfo' ->> 'payrollType' as raw_payrolltype
    , lde.raw_lead -> 'employmentInfo' ->> 'hireDate' as raw_work_hiredate
        --  Contact  --
    , lde.raw_lead -> 'personalInfo' ->> 'dateOfBirth' as raw_dateofbirth
    , lde.raw_lead ->> 'stateCode' as raw_statecode
    from
        lde4.leads as lde
    inner join
        cloudlending.advertising_method as c_am
        on lde.partnerid = c_am.external_id
        and c_am.name = '", admethod.name, " 
    where
        lde.lead_time >= '", timestart, "'::date
        and lde.lead_time <= '", timeend, "'::date
"
        )
    )
}

In [None]:
formatDataTypes = function (df) {
    
    df.reformat = df %>%
        mutate_if(
            is.factor,
            as.character
        ) %>% 
        mutate(
            
#             dateofbirth = if_else(
#                 !is.na(dateofbirth) & dateofbirth != '',
#                 dateofbirth %>% as.POSIXct() %>% ceiling_date(unit = 'day') %>% as.character(),
#                 dateofbirth
#             ),
#             raw_dateofbirth = if_else(
#                 !is.na(raw_dateofbirth) & raw_dateofbirth != '',
#                 paste(
#                     raw_dateofbirth %>% str_sub(1,4),
#                     raw_dateofbirth %>% str_sub(5,6),
#                     raw_dateofbirth %>% str_sub(7,8),
#                     sep = '-'
#                 ),
#                 raw_dateofbirth
#             ),
            dob = floor((lead_time - dateofbirth) %>% as.integer()/365),
            
            zip = if_else(
                zip %>% nchar() == 4,
                paste0('0', zip),
                zip %>% as.character()
            ),
            raw_zip = if_else(
                raw_zip %>% nchar() == 4,
                paste0('0', raw_zip),
                raw_zip %>% as.character()
            ),
            
            abaroutingnumber = abaroutingnumber %>% 
                str_pad(
                    width = 9,
                    pad = '0'
                ),
            raw_abaroutingnumber = raw_abaroutingnumber %>% 
                str_pad(
                    width = 9,
                    pad = '0'
                ),
            
            abaroutingnumber.is.valid = 
                grepl("^[0-9]{9}", abaroutingnumber) &
                !grepl("^[0]{8}[0-9]", abaroutingnumber) &
                !grepl("^(0)\\1\\1\\1\\1\\1\\1\\1\\1", abaroutingnumber) &
                (
                    abaroutingnumber %>% str_sub(1,1) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(2,2) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(3,3) %>% as.integer() * 1 +
                    abaroutingnumber %>% str_sub(4,4) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(5,5) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(6,6) %>% as.integer() * 1 +
                    abaroutingnumber %>% str_sub(7,7) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(8,8) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(9,9) %>% as.integer() * 1
                ) %% 10 == 0,
            raw_abaroutingnumber.is.valid = 
                grepl("^[0-9]{9}", raw_abaroutingnumber) &
                !grepl("^[0]{8}[0-9]", raw_abaroutingnumber) &
                !grepl("^(0)\\1\\1\\1\\1\\1\\1\\1\\1", raw_abaroutingnumber) &
                (
                    raw_abaroutingnumber %>% str_sub(1,1) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(2,2) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(3,3) %>% as.integer() * 1 +
                    raw_abaroutingnumber %>% str_sub(4,4) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(5,5) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(6,6) %>% as.integer() * 1 +
                    raw_abaroutingnumber %>% str_sub(7,7) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(8,8) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(9,9) %>% as.integer() * 1
                ) %% 10 == 0,
            
            accountnumber.is.valid = 
                grepl("^[0-9]{6,17}", accountnumber) &
                !grepl("^([0-9])\\1\\1\\1\\1\\1\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?", accountnumber),
            raw_accountnumber.is.valid = 
                grepl("^[0-9]{6,17}", raw_accountnumber) &
                !grepl("^([0-9])\\1\\1\\1\\1\\1\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?", raw_accountnumber)
            
        )
    
#     df.reformat.no.na = df.reformat %>%
#         apply(
#             MARGIN = 2,
#             replace_na,
#             replace = ''
#         )
    
    return(df.reformat)
}

In [None]:
compareExtractionToRaw = function (df) {
    
    df %>% 
        mutate(
            match.leadofferid = 
                (is.na(leadofferid) & is.na(raw_leadofferid)) |
                (is.na(leadofferid) & raw_leadofferid == '') |
                (!is.na(leadofferid) & !is.na(raw_leadofferid) & leadofferid == raw_leadofferid),
            
            match.abaroutingnumber = 
                (is.na(abaroutingnumber) & is.na(raw_abaroutingnumber)) |
                (is.na(abaroutingnumber) & raw_abaroutingnumber == '') |
                (!is.na(abaroutingnumber) & !is.na(raw_abaroutingnumber) & abaroutingnumber == raw_abaroutingnumber),
            
            match.accountnumber = 
                (is.na(accountnumber) & is.na(raw_accountnumber)) |
                (is.na(accountnumber) & raw_accountnumber == '') |
                (!is.na(accountnumber) & !is.na(raw_accountnumber) & accountnumber == raw_accountnumber),
            
            match.bankname = 
                (is.na(bankname) & is.na(raw_bankname)) |
                (is.na(bankname) & raw_bankname == '') |
                (!is.na(bankname) & !is.na(raw_bankname) & bankname == raw_bankname),
            
            match.grossmonthlyincome = 
                (is.na(grossmonthlyincome) & is.na(raw_grossmonthlyincome)) |
                (is.na(grossmonthlyincome) & raw_grossmonthlyincome == '') |
                (!is.na(grossmonthlyincome) & !is.na(raw_grossmonthlyincome) & grossmonthlyincome == raw_grossmonthlyincome),
            
            match.incometype = 
                (is.na(incometype) & is.na(raw_incometype)) |
                (is.na(incometype) & raw_incometype == '') |
                (!is.na(incometype) & !is.na(raw_incometype) & incometype == raw_incometype),
            
            match.lastpayrolldate = 
                (is.na(lastpayrolldate) & is.na(raw_lastpayrolldate)) |
                (is.na(lastpayrolldate) & raw_lastpayrolldate == '') |
                (!is.na(lastpayrolldate) & !is.na(raw_lastpayrolldate) & lastpayrolldate == raw_lastpayrolldate),
            
            match.payrollfrequency = 
                (is.na(payrollfrequency) & is.na(raw_payrollfrequency)) |
                (is.na(payrollfrequency) & raw_payrollfrequency == '') |
                (!is.na(payrollfrequency) & !is.na(raw_payrollfrequency) & payrollfrequency == raw_payrollfrequency),
            
            match.payrolltype = 
                (is.na(payrolltype) & is.na(raw_payrolltype)) |
                (is.na(payrolltype) & raw_payrolltype == '') |
                (!is.na(payrolltype) & !is.na(raw_payrolltype) & payrolltype == raw_payrolltype),
            
            match.work_hiredate = 
                (is.na(work_hiredate) & is.na(raw_work_hiredate)) |
                (is.na(work_hiredate) & raw_work_hiredate == '') |
                (!is.na(work_hiredate) & !is.na(raw_work_hiredate) & work_hiredate == raw_work_hiredate),
            
            match.dateofbirth = 
                (is.na(dateofbirth) & is.na(raw_dateofbirth)) |
                (is.na(dateofbirth) & raw_dateofbirth == '') |
                (!is.na(dateofbirth) & !is.na(raw_dateofbirth) & dateofbirth == raw_dateofbirth),
            
            match.statecode = 
                (is.na(statecode) & is.na(raw_statecode)) |
                (is.na(statecode) & raw_statecode == '') |
                (!is.na(statecode) & !is.na(raw_statecode) & statecode == raw_statecode)
        )
    
}

In [None]:
examineMatches = function (df) {

    df %>%
        select(
            starts_with('match.')
        ) %>% 
        apply(
            MARGIN = 2,
            FUN = sum
        ) %>% sort() %>%
        c(
            nrow = df.lenderedge.comparison %>% nrow()
        )
}

### Funnel

In [None]:
addFeatures = function (df) {
    
    df %>%
#         filter(
#             appldate >= '2019-07-01' %>% as.Date() &
#             appldate < '2019-10-01' %>% as.Date()
#         ) %>%
        mutate(
            click_id_pos = partner_event_data %>%
                str_extract(
                    "click_?id=%?[0-9]+"
                ) %>% 
                str_locate(
                    "="
                ) %>% .[, 1],
            click_id = partner_event_data %>%
                str_extract(
                    "click_?id=%?[0-9]+"
                ) %>% 
                str_sub(
                    start = click_id_pos + 1,
                    end = partner_event_data %>% nchar()
                ) %>%
                replace_na(
                    'None'
                ),
            has.unique.lead.id = !(lead_id %in% (
                df %>%
                    select(lead_id) %>%
                    group_by(lead_id) %>%
                    mutate(n = n()) %>%
                    ungroup() %>% 
                    filter(n > 1) %>% 
                    .$lead_id
            ))
        )
    
}

### Map

In [None]:
# https://austinwehrwein.com/digital-humanities/creating-a-density-map-in-r-with-zipcodes/
library(zipcode)
library(tidyverse)
library(maps)
library(viridis)
library(ggthemes)
library(albersusa)

In [None]:
us = ggplot2::map_data('state')
us2 = ggplot2::map_data('county')

In [None]:
zip.codes = read_csv("../data/zip_code_database.csv")

In [None]:
valid.zip = zip.codes %>%
    filter(
        !state %in% c('AA', 'AE', 'AP', 'AS', 'FM', 'GU', 'MH', 'MP', 'PR', 'PW', 'VI') &
        decommissioned == 0 &
        nchar(zip) == 5
    ) %>% 
    select(
        zip,
        state,
        latitude,
        longitude
    )

# Summarize Functions

### Time Series

In [None]:
getTimeSeriesFunnel = function (df, aspect.ratio) {
    
    df %>% 
        group_by(
            appldate_ = appldate %>% as.Date()
        ) %>% 
        summarize(
            app.to.funded = mean(funded),
            funding.rate = sum(funded)/sum(qualified),
            bv.q = sum(bankverified)/sum(qualified),
            sc.bv = sum(passscorecardratecard)/sum(bankverified),
            cs.sc = sum(contractsigned)/sum(passscorecardratecard),
            dec.cs = sum(cs_decisioned)/sum(contractsigned),
            f.dec = sum(funded)/sum(cs_decisioned)
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_
            )
        ) +
#         geom_point(aes(y = app.to.funded), color = 'gray') +
#         geom_line(aes(y = app.to.funded), color = 'gray') +
        geom_smooth(aes(y = app.to.funded), color = 'gray') +

#         geom_point(aes(y = funding.rate), color = 'black') +
#         geom_line(aes(y = funding.rate), color = 'black') +
        geom_smooth(aes(y = funding.rate), color = 'black') +

#         geom_point(aes(y = bv.q), color = 'red') +
#         geom_line(aes(y = bv.q), color = 'red') +
        geom_smooth(aes(y = bv.q), color = 'red') +

#         geom_point(aes(y = sc.bv), color = 'blue') +
#         geom_line(aes(y = sc.bv), color = 'blue') +
        geom_smooth(aes(y = sc.bv), color = 'blue') +

#         geom_point(aes(y = cs.sc), color = 'darkgreen') +
#         geom_line(aes(y = cs.sc), color = 'darkgreen') +
        geom_smooth(aes(y = cs.sc), color = 'darkgreen') +

#         geom_point(aes(y = dec.cs), color = 'darkorange') +
#         geom_line(aes(y = dec.cs), color = 'darkorange') +
        geom_smooth(aes(y = dec.cs), color = 'darkorange') +

#         geom_point(aes(y = f.dec), color = 'purple') +
#         geom_line(aes(y = f.dec), color = 'purple') +
        geom_smooth(aes(y = f.dec), color = 'purple') +


        scale_y_continuous(
            labels = scales::percent
        ) +
        labs(
            title = paste(df$admethod[1], 'Funding Time Series'),
            subtitle = "Black: FR From Qualified  //  Gray: From App  //  Red: BV/Q\n
Blue: SC/BV  //  Green: CS/SC  //  Orange: Dec/CS  //  Purple: F/Dec",
            y = 'Conversion',
            x = 'Appldate'
        ) +
        theme(
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5),
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
    
}

In [None]:
getTimeSeriesGMI = function (df, aspect.ratio) {
    
    df %>% 
        filter(
            !is.na(grossmonthlyincome) &
            grossmonthlyincome < 50000
        ) %>% 
        group_by(
            appldate_ = appldate %>% as.Date()
        ) %>% 
        summarize(
            gmi.median = median(grossmonthlyincome),
            gmi.mean = mean(grossmonthlyincome),
            gmi.max = max(grossmonthlyincome)
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_
            )
        ) +

        geom_point(aes(y = gmi.mean), color = 'black') +
        geom_line(aes(y = gmi.mean), color = 'black') +
        geom_smooth(aes(y = gmi.mean), color = 'black') +

        geom_point(aes(y = gmi.median), color = 'red') +
        geom_line(aes(y = gmi.median), color = 'red') +
        geom_smooth(aes(y = gmi.median), color = 'red') +

        scale_y_continuous(
            labels = scales::dollar
        ) +
        labs(
            title = paste(df$admethod[1], 'Income Time Series'),
            subtitle = "Black: Mean\nRed: Median",
            y = 'Gross Monthly Income',
            x = 'Appldate'
        ) +
        theme(
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5),
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
}

In [None]:
getTimeSeriesOfferAmount = function (df, aspect.ratio) {
    
    df %>% 
        filter(
            !is.na(offer_amount)
        ) %>% 
        group_by(
            appldate_ = appldate %>% as.Date()
        ) %>% 
        summarize(
            offer.median = median(offer_amount),
            offer.mean = mean(offer_amount)
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_
            )
        ) +

        geom_point(aes(y = offer.mean), color = 'black') +
        geom_line(aes(y = offer.mean), color = 'black') +
        geom_smooth(aes(y = offer.mean), color = 'black') +

        geom_point(aes(y = offer.median), color = 'red') +
        geom_line(aes(y = offer.median), color = 'red') +
        geom_smooth(aes(y = offer.median), color = 'red') +

        scale_y_continuous(
            labels = scales::dollar
        ) +
        labs(
            title = paste(df$admethod[1], 'Offer Amount Time Series'),
            subtitle = "Black: Mean\nRed: Median",
            y = 'Offer Amount',
            x = 'Appldate'
        ) +
        theme(
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5),
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
}

In [None]:
getTimeSeriesValidBank = function (df, aspect.ratio) {
    
    df %>% 
        filter(
            !is.na(raw_abaroutingnumber.is.valid) &
            !is.na(raw_accountnumber.is.valid)
        ) %>% 
        group_by(
            appldate_ = appldate %>% as.Date()
        ) %>% 
        summarize(
            routing = mean(raw_abaroutingnumber.is.valid),
            account = mean(raw_accountnumber.is.valid)
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_
            )
        ) +

        geom_point(aes(y = routing), color = 'red') +
        geom_line(aes(y = routing), color = 'red') +
        geom_smooth(aes(y = routing), color = 'red') +

        geom_point(aes(y = account), color = 'black') +
        geom_line(aes(y = account), color = 'black') +
        geom_smooth(aes(y = account), color = 'black') +

        scale_y_continuous(
            labels = scales::percent
        ) +
        labs(
            title = paste(df$admethod[1], 'Valid Bank Time Series'),
            subtitle = "Black: Account\nRed: Routing",
            y = '%Valid',
            x = 'Appldate'
        ) +
        theme(
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5),
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
}

In [None]:
getTimeSeriesDenialReason = function (df, aspect.ratio) {
    
    df %>%
        filter(
            cs_decisioned == 1
        ) %>% 
        mutate(
            denial_reason_agg = case_when(
                is.na(denial_reason)  ~  'Loan Approved',
                denial_reason %>% str_detect(pattern = '.*Insufficient Income.*')  ~  'Insufficient Income',
                denial_reason %>% str_detect(pattern = '.*No Direct Deposit.*')  ~  'No Direct Deposit',
                denial_reason %>% str_detect(pattern = '.*Incompatible Bank.*')  ~  'Incompatible Bank',
                denial_reason %>% str_detect(pattern = '.*Negative Bank Account Balance.*')  ~  'Negative Bank Account Balance',
                denial_reason %>% str_detect(pattern = '.*Self-Employed.*')  ~  'Self Employed'
            ) %>% replace_na('Other')
        ) %>% 
        group_by(
            appldate_ = appldate %>% as.Date()
        ) %>% 
        summarize(
            insufficient.income = mean(denial_reason_agg == 'Insufficient Income'),
            no.direct.deposit = mean(denial_reason_agg == 'No Direct Deposit'),
            incompatible.bank = mean(denial_reason_agg == 'Incompatible Bank'),
            negative.balance = mean(denial_reason_agg == 'Negative Bank Account Balance'),
            self.employed = mean(denial_reason_agg == 'Self Employed'),
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_
            )
        ) +

#         geom_point(aes(y = insufficient.income), color = 'red') +
#         geom_line(aes(y = insufficient.income), color = 'red') +
        geom_smooth(aes(y = insufficient.income), color = 'red') +

#         geom_point(aes(y = no.direct.deposit), color = 'black') +
#         geom_line(aes(y = no.direct.deposit), color = 'black') +
        geom_smooth(aes(y = no.direct.deposit), color = 'black') +

#         geom_point(aes(y = incompatible.bank), color = 'blue') +
#         geom_line(aes(y = incompatible.bank), color = 'blue') +
        geom_smooth(aes(y = incompatible.bank), color = 'blue') +

#         geom_point(aes(y = negative.balance), color = 'darkgreen') +
#         geom_line(aes(y = negative.balance), color = 'darkgreen') +
        geom_smooth(aes(y = negative.balance), color = 'darkgreen') +

#         geom_point(aes(y = self.employed), color = 'purple') +
#         geom_line(aes(y = self.employed), color = 'purple') +
        geom_smooth(aes(y = self.employed), color = 'purple') +

        scale_y_continuous(
            labels = scales::percent
        ) +
        labs(
            title = paste(df$admethod[1], 'Denial Reason Time Series'),
            subtitle = "Black: No Direct Deposit  //  Red: Insufficient Income\n
Green: Negative Balance  //  Blue: Incompatible Bank  //  Purple: Self Employed"
            ,
            y = '%CS Decisioned',
            x = 'Appldate'
        ) +
        theme(
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5),
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
}

### Geography

In [None]:
getZipPlotVolume = function (df) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            metric = sum(qualified)
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color gradient on the metric (Q Volume)  ##
        geom_point(
            mapping = aes(
                color = metric
            ),
            size = 0.8,
            alpha = 0.25
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
        labs(
            title = paste(df$admethod[1], "New Qualified Volume by Geography")
        ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9)
    
}

In [None]:
getZipPlotFunnel = function (df, numerator.quo, denominator.quo) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 0.33 
#             metric > 0
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color, size, and transparency gradient on the metric (FR)  ##
        geom_point(
            mapping = aes(
                color = metric,
                size = n,
#                 alpha = metric
            ),
            # size = 0.8,
            alpha = 0.6
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
        labs(
            title = paste(df$admethod[1], "New Qualified Volume by Geography")
        ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9) +
        ##  Adjust coloring  ##
        scale_colour_gradientn(colours = c("brown3", "gold", "springgreen4"))
}

In [None]:
getZipPlotIncome = function (df) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            n = n(),
            metric = median(grossmonthlyincome)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric <= 6000
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color, size, and transparency gradient on the metric (FR)  ##
        geom_point(
            mapping = aes(
                color = metric,
                size = n,
                alpha = metric
            )
            # size = 0.8,
            # alpha = 0.25
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
#         labs(
#             title = "LenderEdge 4: New FR by Geography"
#         ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9) +
        ##  Adjust coloring  ##
        scale_colour_gradientn(colours = c("brown3", "gold", "springgreen4"))
}

In [None]:
getZipGMIPlotFunnel = function (df, numerator.quo, denominator.quo) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo),
            variable = median(grossmonthlyincome)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 0.4 &
            metric > 0
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color, size, and transparency gradient on the metric (FR)  ##
        geom_point(
            mapping = aes(
                color = metric,
                size = variable,
#                 alpha = metric
            ),
            # size = 0.8,
            alpha = 0.7
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
#         labs(
#             title = "LenderEdge 4: New FR by Geography"
#         ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9) +
        ##  Adjust coloring  ##
        scale_colour_gradientn(colours = c("brown3", "gold", "springgreen4"))
}

In [None]:
getZipDOBPlotFunnel = function (df, numerator.quo, denominator.quo) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        filter(
            qualified == 1 &
            !is.na(raw_dateofbirth)
        ) %>% 
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo),
            variable = mean(dob)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1 &
            metric > 0
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color, size, and transparency gradient on the metric (FR)  ##
        geom_point(
            mapping = aes(
                color = metric,
                size = variable,
#                 alpha = metric
            ),
            # size = 0.8,
            alpha = 0.7
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
#         labs(
#             title = "LenderEdge 4: New FR by Geography"
#         ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9) +
        ##  Adjust coloring  ##
        scale_colour_gradientn(colours = c("brown3", "gold", "springgreen4"))
}

In [None]:
getValidZipPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            is.valid = zip %in% valid.zip$zip
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = is.valid,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

In [None]:
getStatePlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            statecode
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )

    ##  Assign as factor and sort levels to assign ordering for plot.  ##
    summary.df$statecode = factor(summary.df$statecode, levels = summary.df$statecode[order(-summary.df$n)])
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = statecode,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

### Income

In [None]:
getIncomePlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            grossmonthlyincome = .$grossmonthlyincome/100,
            grossmonthlyincome = grossmonthlyincome %>% floor() * 100
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = grossmonthlyincome,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        scale_y_continuous(
            labels = scales::percent
        ) +
        xlim(0,8000) +
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        labs(
            title = paste(df$admethod[1], 'New FR by GMI')
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

In [None]:
getGMIPlotDOB = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(dateofbirth) &
            !is.na(grossmonthlyincome) & 
            grossmonthlyincome < 50000
        ) %>% 
        group_by(
            dob = floor((appldate - dateofbirth) %>% as.integer()/365/10)*10
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo),
            gmi = mean(grossmonthlyincome)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = dob,
                y = gmi
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        scale_y_continuous(
            labels = scales::dollar
        ) +
        geom_text(
            mapping = aes(
                label = n
            ),
            size = 3,
            angle = -45
        ) +
        theme(
            axis.text.x = element_text(
                angle = -45
            )
        )
    
}

In [None]:
getDOBGMITileFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(dateofbirth)
        ) %>% 
        group_by(
            dob = floor((appldate - dateofbirth) %>% as.integer()/365/10) * 10,
            gmi = floor(grossmonthlyincome/1000) * 1000
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 0.3
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = gmi,
                y = dob
            )
        ) +
        geom_tile(
            mapping = aes(
                fill = metric
            )
        ) + 
        xlim(0,10000) +
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        geom_text(
            mapping = aes(
                label = n,
                size = n
            )
        )
    
}

In [None]:
getClickIDGMITileFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(dateofbirth)
        ) %>% 
        group_by(
            click_id,
            gmi = floor(grossmonthlyincome/1000) * 1000
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 0.3
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = gmi,
                y = click_id
            )
        ) +
        geom_tile(
            mapping = aes(
                fill = metric
            )
        ) + 
        xlim(0,10000) +
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        geom_text(
            mapping = aes(
                label = n,
                size = n
            )
        )
    
}

In [None]:
getClickIDGMITileDOB = function (df, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(dateofbirth) &
            !is.na(click_id) &
            !is.na(grossmonthlyincome)
        ) %>% 
        group_by(
            click_id,
            gmi = floor(grossmonthlyincome/1000) * 1000
        ) %>% 
        summarize(
            n = n(),
            dob = mean(floor((appldate - dateofbirth) %>% as.integer()/365) > 40)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = gmi,
                y = click_id
            )
        ) +
        geom_tile(
            mapping = aes(
                fill = dob
            )
        ) + 
        xlim(0,10000) +
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        geom_text(
            mapping = aes(
                label = n,
                size = n
            )
        )
    
}

### Click ID

In [None]:
getClickIDPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1 &
            !is.na(click_id)
        ) %>% 
        group_by(
            click_id
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    summary.df$click_id = factor(summary.df$click_id, levels = summary.df$click_id[order(-summary.df$metric)])
    
    ##  Bar Plot  ##
    summary.df %>% filter(click_id != 'None') %>% 
        ggplot(
            mapping = aes(
                x = click_id,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        scale_y_continuous(
            labels = scales::percent
        ) +
        geom_text(
            mapping = aes(
                label = n
            ),
            size = 3,
            angle = -45
        ) +
        theme(
            axis.text.x = element_text(
                angle = -45
            )
        )
    
}

In [None]:
getClickIDPlotGMI = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(click_id) &
            !is.na(grossmonthlyincome)
        ) %>% 
        group_by(
            click_id
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo),
            gmi = mean(grossmonthlyincome)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    summary.df$click_id = factor(summary.df$click_id, levels = summary.df$click_id[order(-summary.df$metric)])
    
    ##  Bar Plot  ##
    summary.df %>% filter(click_id != 'None') %>% 
        ggplot(
            mapping = aes(
                x = click_id,
                y = gmi
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        scale_y_continuous(
            labels = scales::dollar
        ) +
        geom_text(
            mapping = aes(
                label = n
            ),
            size = 3,
            angle = -45
        ) +
        theme(
            axis.text.x = element_text(
                angle = -45
            )
        )
    
}

In [None]:
getClickIDPlotDOB = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            !is.na(click_id) &
            !is.na(dateofbirth)
        ) %>% 
        group_by(
            click_id
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo),
#             dob = mean(floor((appldate - dateofbirth) %>% as.integer()/365))
            dob = mean(floor((appldate - dateofbirth) %>% as.integer()/365) <= 40)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    summary.df$click_id = factor(summary.df$click_id, levels = summary.df$click_id[order(-summary.df$metric)])
    
    ##  Bar Plot  ##
    summary.df %>% filter(click_id != 'None') %>% 
        ggplot(
            mapping = aes(
                x = click_id,
                y = dob
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
#             ratio = 0.12
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        ) +
        scale_y_continuous(
#             labels = scales::dollar
        ) +
        geom_text(
            mapping = aes(
                label = n
            ),
            size = 3,
            angle = -45
        ) +
        theme(
            axis.text.x = element_text(
                angle = -45
            )
        )
    
}

In [None]:
getClickIDTimeSeries = function (df, click.id.vector, aspect.ratio) {
    
    df %>% 
        filter(
            click_id %in% click.id.vector
        ) %>% 
        group_by(
            appldate_ = appldate %>% as.Date(),
            click_id
        ) %>% 
        summarize(
            n = n()
        ) %>% 
        ggplot(
            mapping = aes(
                x = appldate_,
                y = n
            )
        ) +
        geom_line(
            mapping = aes(
                color = click_id
            ),
            size = 1
        ) +
        geom_point(
            mapping = aes(
                color = click_id
            ),
            size = 2
        ) +
        coord_fixed(
            ratio = aspect.ratio
        )
}

### DOB

In [None]:
getDOBPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            dob = floor((lead_time - dateofbirth) %>% as.integer()/365)
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = dob,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

### Bank

In [None]:
getABAPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        mutate(
            na.aba = is.na(abaroutingnumber)
        ) %>% 
        filter(
            qualified == 1 &
            !is.na(abaroutingnumber.is.valid) &
            !(!abaroutingnumber.is.valid & !na.aba)
        ) %>% 
        group_by(
            enter.aba = abaroutingnumber.is.valid
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = enter.aba,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

In [None]:
getAccountNumPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1 &
            !is.na(accountnumber.is.valid) &
            !(!accountnumber.is.valid & is.na(accountnumber))
        ) %>% 
        group_by(
            accountnumber.is.valid
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = accountnumber.is.valid,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

In [None]:
getAccountTypePlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            raw_accounttype
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = raw_accounttype,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

In [None]:
getAccountLengthPlotFunnel = function (df, numerator.quo, denominator.quo, aspect.ratio) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            raw_accountlength
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = raw_accountlength,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = aspect.ratio
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

# LenderEdge 4

Proposal:
- Income:
    - <b>Ignore the value passed in the payload.</b>
    - Higher income customer to reduce Insiffucient Income denials.
- DOB
    - Senior applicants have more issues through the funnel
- <b>Click ID!</b>
- <b>Wait for more data</b>, to create a more robust decision tree/random forest model.
    - Initial model suggests to accept the following income ranges & click IDs for each state:

In [None]:
writeLenderEdgeDF = function () {

    df.lenderedge.comparison = read_csv(
            "..\\data\\lenderedge4_df.csv"
        ) %>%
        formatDataTypes() %>%
        compareExtractionToRaw()

    lenderedge.response = read_csv(
            "../data/lenderedge4_response_df.csv"
        ) %>% addFeatures()

    lenderedge = lenderedge.response %>%
        left_join(
            df.lenderedge.comparison,
            by = c('lead_id' = 'lead_id')
        ) %>%
        left_join(
            valid.zip,
            by = c('zip' = 'zip')
        )

    lenderedge %>% write.csv("../data/lenderedge4_df.csv")
}

lenderedge = read_csv("..\\data\\lenderedge4_df.csv")
theme_set(theme_bw())

### Time Series

- In October, Funding Rate increased, driven by BV/Q and F/Decisioned.
- In October, We began to ignore the Bank Account JSON Object in the raw_lead payload.
- Mean GMI has steadily increased, reflecting offer amount increase and Insufficient Income decrease.- 

In [None]:
lenderedge %>% getTimeSeriesFunnel(90)
lenderedge %>% getTimeSeriesGMI(0.03)
lenderedge %>% getTimeSeriesOfferAmount(0.1)
lenderedge %>% getTimeSeriesValidBank(15)
lenderedge %>% getTimeSeriesDenialReason(150)

### Geography

- Volume centralized in FL, Midwest, and West coast, each of which have a gradient for FR.
- There is no concentrated state that funds better than others.
- Most states/MSA show low conversion (red) on low volume (small).

In [None]:
lenderedge %>% getZipPlotVolume()
lenderedge %>% getZipPlotFunnel(quo(funded), quo(qualified))
lenderedge %>% getZipPlotIncome()
lenderedge %>% getValidZipPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5)
lenderedge %>% getStatePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 40)
lenderedge %>% getZipGMIPlotFunnel(quo(funded), quo(qualified))
lenderedge %>% getZipDOBPlotFunnel(quo(funded), quo(qualified))
lenderedge %>% getZipDOBPlotFunnel(quo(bankverified), quo(qualified))

### Income

- Concentrated in Lower Income
- Passes in discrete values, can we ask for Income again?
- FR for higher income customers becomes more volatile.

In [None]:
lenderedge %>% getIncomePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5000)
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getGMIPlotDOB(quo(funded), quo(qualified), aspect.ratio = 0.01)
lenderedge %>% getDOBGMITileFunnel(quo(funded), quo(qualified), aspect.ratio = 80)
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>%  getClickIDGMITileFunnel(quo(funded), quo(qualified), aspect.ratio = 350)
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>%  getClickIDGMITileDOB(aspect.ratio = 350)

### Click ID

- Click ID mix continuously shifts.
- How are Click IDs 1716 different from 1736 & 1757?
    - 1716 // 1736 on income
    - 1757 // 1716 & 1736 on income

In [None]:
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 40)
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotGMI(quo(funded), quo(qualified), aspect.ratio = 0.0015)
lenderedge %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotDOB(quo(funded), quo(qualified), aspect.ratio = 8)
lenderedge %>% getClickIDTimeSeries(click.id.vector = c('1716', '1736', '1757'), aspect.ratio = 0.1)

### DOB

- FR drops off for older customers.

In [None]:
lenderedge %>% getDOBPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 150)

### Bank

In [None]:
lenderedge %>% getABAPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 10)
lenderedge %>% getAccountNumPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 10)
lenderedge %>% getAccountTypePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 15)
lenderedge %>% getAccountLengthPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5)

### Decision Tree

In [None]:
# https://www.statmethods.net/advstats/cart.html
library(rpart)
library(rattle)
library(rpart.plot)

In [None]:
getPopultationDecisionTree = function (df) {

    rpart(
        formula = funded ~
            click_id +
            raw_abaroutingnumber.is.valid +
            raw_accountnumber.is.valid +
            raw_grossmonthlyincome +
            raw_statecode,
    #         age,
        data = df %>%
            mutate(
                funded = ifelse(funded, "Funded", "Denied")
            ),
        method = 'class',    ##  classification tree
        control = rpart.control(
            minsplit = 2,
            minbucket = 2,
            cp = 0.0002
        )
    ) %T>% 
#         summary() %T>%
#         plot() %T>%
#         text() %T>%
        fancyRpartPlot()
}
# lenderedge %>% getPopultationDecisionTree()

### Random Forest

In [None]:
library(randomForest)

In [None]:
reformatRandomForest = function (df) {
    
    df %>%
        filter(
            !is.na(raw_statecode)
        ) %>%
        mutate(
            funded = ifelse(funded, "Funded", "Denied")
        ) %>% 
        mutate_if(
            .predicate = is.character,
            .funs = as.factor  ##  requires factors
        )
}

In [None]:
getPopulationRandomForest = function (df.rf.reformat) {
    
    randomForest(
        formula = funded ~
            click_id +
            raw_grossmonthlyincome +
            raw_statecode,
        data = df.rf.reformat,
        mtry = 2,
        ntree = 5,
        sampsize = ceiling(0.632*nrow(df.rf.reformat)),
        replace = TRUE,
        importance = TRUE
    )
}
# lenderedge %>% reformatRandomForest() %>% getPopulationRandomForest()

In [None]:
getPopulationRandomForestWeighted = function (df.rf.reformat) {

    randomForest(
        formula = funded ~
            click_id +
            raw_grossmonthlyincome +
            raw_statecode,
        data = df.rf.reformat,
        mtry = 2,
        ntree = 500,
        strata = df.rf.reformat$funded,
        sampsize = c(
            200,  ##  Denied
            100   ##  Approved
        ),
        replace = TRUE,
        importance = TRUE
    )   
}
# lenderedge %>% reformatRandomForest() %>% getPopulationRandomForestWeighted()

In [None]:
tuneRandomForestWeighting = function (df.rf.reformat) {

    approved.n = seq(
        from = 100,
        to = 1000,
        by = 100
    )

    oob.error = vector(
        length = length(approved.n)
    )

    container = data.frame(
        approved.n,
        oob.error
    )

    for (i in 1:nrow(container)) {

        lenderedge.rf.wt = randomForest(
            formula = funded ~
                click_id +
                raw_abaroutingnumber.is.valid +
                raw_accountnumber.is.valid +
                raw_grossmonthlyincome +
                raw_statecode,
            data = df.rf.reformat,
            mtry = 2,
            ntree = 500,
            strata = df.rf.reformat$funded,
        #     sampsize = ceiling(0.8*nrow(lenderedge.train)),
            sampsize = c(
                200,                      ##  Denied
                container$approved.n[1]   ##  Approved
            ),
            replace = TRUE,
            importance = TRUE
        )

        container$oob.error[i] = lenderedge.rf.wt$err.rate %>% as.data.frame() %>% summarize(mu = mean(Denied)*mean(Funded)) %>% .$mu
    }
}
lenderedge %>% reformatRandomForest() %>% tuneRandomForestWeighting()

# Quin Street 4

Proposal:
    - In October, CS/SC has tanked, but is slightly offset by improvments in other parts of the funnel.
    - In October, Insufficient Income Denials fell, but offset by Incompatible Bank surge.
    - Income appears discrete; ignore the value passed in the payload
    - Higher GMI correlates with higher FR, but majority of volume is lower GMI.
    - See below proposal for Click ID pruning

In [None]:
writeQuinStreetDF = function () {

    quinstreet.comparison = read_csv(
            "..\\data\\quinstreet4_lead_df.csv"
        ) %>%
        formatDataTypes() %>%
        compareExtractionToRaw()

    quinstreet.response = read_csv(
            "../data/quinstreet4_response_df.csv"
        ) %>% addFeatures()

    quinstreet = quinstreet.response %>%
        left_join(
            quinstreet.comparison,
            by = c('lead_id' = 'lead_id')
        ) %>%
        left_join(
            valid.zip,
            by = c('zip' = 'zip')
        )

    quinstreet %>% write.csv("../data/quinstreet4_df.csv")
}
# writeQuinStreetDF()
# quinstreet = read_csv("..\\data\\quinstreet4_df.csv")
theme_set(theme_bw())

In [None]:
# quinstreet %>% getTimeSeriesFunnel(90)
# quinstreet %>% getTimeSeriesGMI(0.05)
# quinstreet %>% getTimeSeriesOfferAmount(0.1)
# quinstreet %>% getTimeSeriesValidBank(30)
# quinstreet %>% getTimeSeriesDenialReason(400)

In [None]:
# quinstreet %>% getZipPlotVolume()
# quinstreet %>% getZipPlotFunnel(quo(funded), quo(qualified))
# quinstreet %>% getZipPlotIncome()
# quinstreet %>% getValidZipPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5)
# quinstreet %>% getStatePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 40)
# quinstreet %>% getZipGMIPlotFunnel(quo(funded), quo(qualified))
# quinstreet %>% getZipDOBPlotFunnel(quo(funded), quo(qualified))
# quinstreet %>% getZipDOBPlotFunnel(quo(bankverified), quo(qualified))

In [None]:
# quinstreet %>% getIncomePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5000)
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getGMIPlotDOB(quo(funded), quo(qualified), aspect.ratio = 0.01)
# quinstreet %>% getDOBGMITileFunnel(quo(funded), quo(qualified), aspect.ratio = 80)
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>%  getClickIDGMITileFunnel(quo(funded), quo(qualified), aspect.ratio = 350)
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>%  getClickIDGMITileDOB(aspect.ratio = 350)

In [None]:
## List of Click ID performance!!!!
details = quinstreet %>% 
    filter(
        !is.na(accepted) &
        appldate >= '2019-10-01' %>% as.Date()
    ) %>% 
    group_by(
        click.id = replace_na(click_id %>% as.character(), 'None')
    ) %>% 
    summarize(
        leads = n(),
        accepts = sum(accepted),
        qualified = sum(qualified),
        funded = sum(funded),
        accept.to.fund = funded/accepts,
        funding.rate = funded/qualified
    ) %>% 
    ungroup() %>% 
    filter(
        leads > 10 &
        funding.rate < 0.25
    ) %>% 
    arrange(
        funding.rate
    ) %>% slice(1:15)

details

quinstreet %>% 
    filter(
        !is.na(accepted) &
        appldate >= '2019-10-01' %>% as.Date()
    ) %>% 
    summarize(
        leads = n(),
        accepts = sum(accepted),
        qualified = sum(qualified),
        funded = sum(funded),
        accept.to.fund = funded/accepts,
        funding.rate = funded/qualified
    )

details %>% 
    summarize(
        leads = sum(leads),
        accepts = sum(accepts),
        qualified = sum(qualified),
        funded = sum(funded),
        accept.to.fund = funded/accepts,
        funding.rate = funded/qualified
    )

quinstreet %>% 
    filter(
        !is.na(accepted) &
        appldate >= '2019-10-01' %>% as.Date() &
        !replace_na(click_id %>% as.character(), 'None') %in% c(
            '05669144',
            '112267144',
            '112554544',
            '110359244',
            '109852344',
            '105949144',
            '89046644',
            '111448744',
            '93684244',
            '106602444',
            '112806744',
            '112328444'
        )
    ) %>% 
    summarize(
        leads = n(),
        accepts = sum(accepted),
        qualified = sum(qualified),
        funded = sum(funded),
        accept.to.fund = funded/accepts,
        funding.rate = funded/qualified
    )

In [None]:
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 40)
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotGMI(quo(funded), quo(qualified), aspect.ratio = 0.005)
# quinstreet %>% filter(appldate >= '2019-10-01' %>% as.Date()) %>% getClickIDPlotDOB(quo(funded), quo(qualified), aspect.ratio = 40)
# quinstreet %>% getClickIDTimeSeries(click.id.vector = c('1716', '1736', '1757'), aspect.ratio = 0.1)

In [None]:
# quinstreet %>% getDOBPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 150)

In [None]:
# quinstreet %>% getABAPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 10)
# quinstreet %>% getAccountNumPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 10)
# quinstreet %>% getAccountTypePlotFunnel(quo(funded), quo(qualified), aspect.ratio = 15)
# quinstreet %>% getAccountLengthPlotFunnel(quo(funded), quo(qualified), aspect.ratio = 5)

In [None]:
# Exclusing the above Click IDs, run a decision tree!