## Notes

Questions:
    * Are we reading the raw lead correctly?
    * Do certain field values correlate with higher conversion?
        * Within Partner i.
        * Across Partners.

Additional Data:
    * URL
    * Click ID
    * Funnel Performance. Denial Reason, conversion boolean



LenderEdge
    * A. Raw DOB is standard date format, without dashes. A couple of blanks.
    * B. When raw_incometype != 'Employment', defaults to 'OtherTaxableIncome'. ~30k, check time series.
    * C. Often writes NA accountnumber when raw_accountnumber contains a value. ~500k, check time series.
    * D. Blank, != 9 digit, and mail merge fields passed into Routing Number.

1B) 
df.lenderedge.comparison %>%
    group_by(
        raw_incometype,
        incometype
    ) %>% 
    summarize(
        n()
    )

# Import Libraries

In [None]:
library(opploansanalytics)
load.packages()

# Import and Clean Data

## Leads

In [None]:
getAdmethod = function () {
    
    queryReporting(
"
select
    c_adm.name
    , count(*)
from
    cloudlending.applications as c_app
    inner join
        cloudlending.advertising_method as c_adm
        on c_app.advertising_method = c_adm.id
        and right(c_adm.name,1) = '4'
where
    date_trunc('month', c_app.createddate) = '2019-09-01'::date
group by
    1
order by
    2 desc
limit 10    
"
    )
}

In [None]:
getLeadsDF = function (admethod.name, timestart, timeend) {
    
    queryReporting(
        paste0(
"
select

    --  Identifiers  --
    lde.lead_id
    , lde.partnerid
    , lde.leadofferid
    , lde.lead_time
    , c_am.name
    , lde.accepted
    , lde.reason
    , lde.offer_amount
    --  Banking  --
    , lde.abaroutingnumber
    , lde.accountnumber
    , lde.bankname
    --  Employment  --
    , lde.grossmonthlyincome
    , lde.incometype
    , lde.lastpayrolldate
    , lde.payrollfrequency
    , lde.payrolltype
    , lde.work_hiredate
    --  Contact  --
    , lde.dateofbirth
    , lde.statecode


    --  Raw Lead  --
    , lde.raw_lead
        --  ID  --
    , lde.raw_lead ->> 'leadOfferId' as raw_leadofferid
    , lde.raw_lead ->> 'requestedLoanAmount' as raw_loan_request
        --  Banking  --
    , lde.raw_lead -> 'bankInfo' ->> 'abaRoutingNumber' as raw_abaroutingnumber
    , lde.raw_lead -> 'bankInfo' ->> 'accountNumber' as raw_accountnumber
    , lde.raw_lead -> 'bankInfo' ->> 'bankName' as raw_bankname
    , lde.raw_lead -> 'bankInfo' ->> 'accountType' as raw_accounttype
    , lde.raw_lead -> 'bankInfo' ->> 'accountLength' as raw_accountlength
        --  Employment  --
    , lde.raw_lead ->> 'grossMonthlyIncome' as raw_grossmonthlyincome
    , lde.raw_lead -> 'incomeInfo' ->> 'incomeType' as raw_incometype
    , lde.raw_lead -> 'incomeInfo' ->> 'lastPayrollDate' as raw_lastpayrolldate
    , lde.raw_lead -> 'incomeInfo' ->> 'payrollFrequency' as raw_payrollfrequency
    , lde.raw_lead -> 'incomeInfo' ->> 'payrollType' as raw_payrolltype
    , lde.raw_lead -> 'employmentInfo' ->> 'hireDate' as raw_work_hiredate
        --  Contact  --
    , lde.raw_lead -> 'personalInfo' ->> 'dateOfBirth' as raw_dateofbirth
    , lde.raw_lead ->> 'stateCode' as raw_statecode
    from
        lde4.leads as lde
    inner join
        cloudlending.advertising_method as c_am
        on lde.partnerid = c_am.external_id
        and c_am.name = '", admethod.name, " 
    where
        lde.lead_time >= '", timestart, "'::date
        and lde.lead_time <= '", timeend, "'::date
"
        )
    )
}

df.lenderedge = read.csv("..\\data\\lenderedge_df_new.csv")

In [None]:
formatDataTypes = function (df) {
    
    df.reformat = df %>%
        mutate_if(
            is.factor,
            as.character
        ) %>% 
        mutate(
            
            dateofbirth = if_else(
                !is.na(dateofbirth) & dateofbirth != '',
                dateofbirth %>% as.POSIXct() %>% ceiling_date(unit = 'day') %>% as.character(),
                dateofbirth
            ),
            raw_dateofbirth = if_else(
                raw_dateofbirth != '',
                paste(
                    raw_dateofbirth %>% str_sub(1,4),
                    raw_dateofbirth %>% str_sub(5,6),
                    raw_dateofbirth %>% str_sub(7,8),
                    sep = '-'
                ),
                raw_dateofbirth
            ),
            
            zip = if_else(
                zip %>% nchar() == 4,
                paste0('0', zip),
                zip %>% as.character()
            ),
            raw_zip = if_else(
                raw_zip %>% nchar() == 4,
                paste0('0', raw_zip),
                raw_zip %>% as.character()
            ),
            
            abaroutingnumber.is.valid = 
                grepl("^[0-9]{9}", abaroutingnumber) &
                !grepl("^[0]{8}[0-9]", abaroutingnumber) &
                !grepl("^(0)\\1\\1\\1\\1\\1\\1\\1\\1", abaroutingnumber) &
                (
                    abaroutingnumber %>% str_sub(1,1) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(2,2) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(3,3) %>% as.integer() * 1 +
                    abaroutingnumber %>% str_sub(4,4) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(5,5) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(6,6) %>% as.integer() * 1 +
                    abaroutingnumber %>% str_sub(7,7) %>% as.integer() * 3 +
                    abaroutingnumber %>% str_sub(8,8) %>% as.integer() * 7 +
                    abaroutingnumber %>% str_sub(9,9) %>% as.integer() * 1
                ) %% 10 == 0,
            raw_abaroutingnumber.is.valid = 
                grepl("^[0-9]{9}", raw_abaroutingnumber) &
                !grepl("^[0]{8}[0-9]", raw_abaroutingnumber) &
                !grepl("^(0)\\1\\1\\1\\1\\1\\1\\1\\1", raw_abaroutingnumber) &
                (
                    raw_abaroutingnumber %>% str_sub(1,1) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(2,2) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(3,3) %>% as.integer() * 1 +
                    raw_abaroutingnumber %>% str_sub(4,4) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(5,5) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(6,6) %>% as.integer() * 1 +
                    raw_abaroutingnumber %>% str_sub(7,7) %>% as.integer() * 3 +
                    raw_abaroutingnumber %>% str_sub(8,8) %>% as.integer() * 7 +
                    raw_abaroutingnumber %>% str_sub(9,9) %>% as.integer() * 1
                ) %% 10 == 0,
            
            accountnumber.is.valid = 
                grepl("^[0-9]{6,17}", accountnumber) &
                !grepl("^([0-9])\\1\\1\\1\\1\\1\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?", accountnumber),
            raw_accountnumber.is.valid = 
                grepl("^[0-9]{6,17}", raw_accountnumber) &
                !grepl("^([0-9])\\1\\1\\1\\1\\1\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?\\1?", raw_accountnumber)
            
        )
    
#     df.reformat.no.na = df.reformat %>%
#         apply(
#             MARGIN = 2,
#             replace_na,
#             replace = ''
#         )
    
    return(df.reformat)
}

df.lenderedge.reformat = df.lenderedge %>% formatDataTypes()

In [None]:
compareExtractionToRaw = function (df) {
    
    df %>% 
        mutate(
            match.leadofferid = 
                (is.na(leadofferid) & is.na(raw_leadofferid)) |
                (is.na(leadofferid) & raw_leadofferid == '') |
                (!is.na(leadofferid) & !is.na(raw_leadofferid) & leadofferid == raw_leadofferid),
            
            match.abaroutingnumber = 
                (is.na(abaroutingnumber) & is.na(raw_abaroutingnumber)) |
                (is.na(abaroutingnumber) & raw_abaroutingnumber == '') |
                (!is.na(abaroutingnumber) & !is.na(raw_abaroutingnumber) & abaroutingnumber == raw_abaroutingnumber),
            
            match.accountnumber = 
                (is.na(accountnumber) & is.na(raw_accountnumber)) |
                (is.na(accountnumber) & raw_accountnumber == '') |
                (!is.na(accountnumber) & !is.na(raw_accountnumber) & accountnumber == raw_accountnumber),
            
            match.bankname = 
                (is.na(bankname) & is.na(raw_bankname)) |
                (is.na(bankname) & raw_bankname == '') |
                (!is.na(bankname) & !is.na(raw_bankname) & bankname == raw_bankname),
            
            match.grossmonthlyincome = 
                (is.na(grossmonthlyincome) & is.na(raw_grossmonthlyincome)) |
                (is.na(grossmonthlyincome) & raw_grossmonthlyincome == '') |
                (!is.na(grossmonthlyincome) & !is.na(raw_grossmonthlyincome) & grossmonthlyincome == raw_grossmonthlyincome),
            
            match.incometype = 
                (is.na(incometype) & is.na(raw_incometype)) |
                (is.na(incometype) & raw_incometype == '') |
                (!is.na(incometype) & !is.na(raw_incometype) & incometype == raw_incometype),
            
            match.lastpayrolldate = 
                (is.na(lastpayrolldate) & is.na(raw_lastpayrolldate)) |
                (is.na(lastpayrolldate) & raw_lastpayrolldate == '') |
                (!is.na(lastpayrolldate) & !is.na(raw_lastpayrolldate) & lastpayrolldate == raw_lastpayrolldate),
            
            match.payrollfrequency = 
                (is.na(payrollfrequency) & is.na(raw_payrollfrequency)) |
                (is.na(payrollfrequency) & raw_payrollfrequency == '') |
                (!is.na(payrollfrequency) & !is.na(raw_payrollfrequency) & payrollfrequency == raw_payrollfrequency),
            
            match.payrolltype = 
                (is.na(payrolltype) & is.na(raw_payrolltype)) |
                (is.na(payrolltype) & raw_payrolltype == '') |
                (!is.na(payrolltype) & !is.na(raw_payrolltype) & payrolltype == raw_payrolltype),
            
            match.work_hiredate = 
                (is.na(work_hiredate) & is.na(raw_work_hiredate)) |
                (is.na(work_hiredate) & raw_work_hiredate == '') |
                (!is.na(work_hiredate) & !is.na(raw_work_hiredate) & work_hiredate == raw_work_hiredate),
            
            match.dateofbirth = 
                (is.na(dateofbirth) & is.na(raw_dateofbirth)) |
                (is.na(dateofbirth) & raw_dateofbirth == '') |
                (!is.na(dateofbirth) & !is.na(raw_dateofbirth) & dateofbirth == raw_dateofbirth),
            
            match.statecode = 
                (is.na(statecode) & is.na(raw_statecode)) |
                (is.na(statecode) & raw_statecode == '') |
                (!is.na(statecode) & !is.na(raw_statecode) & statecode == raw_statecode)
        )
    
}

df.lenderedge.comparison = df.lenderedge.reformat %>% compareExtractionToRaw()

In [None]:
examineMatches = function (df) {

    df %>%
        select(
            starts_with('match.')
        ) %>% 
        apply(
            MARGIN = 2,
            FUN = sum
        ) %>% sort() %>%
        c(
            nrow = df.lenderedge.comparison %>% nrow()
        )
}

df.lenderedge.comparison %>% examineMatches()

## Funnel

In [None]:
lenderedge.response = read_csv("../data/lenderedge_response_df.csv")

In [None]:
getResponseSubset = function (df) {
    
    df %>%
        filter(
            appldate >= '2019-07-01' %>% as.Date() &
            appldate < '2019-10-01' %>% as.Date()
        ) %>%
        mutate(
            click_id_pos = partner_event_data %>%
                str_extract(
                    "click_id=%?[0-9]+"
                ) %>% 
                str_locate(
                    "="
                ) %>% .[, 1],
            click_id = partner_event_data %>%
                str_extract(
                    "click_id=%?[0-9]+"
                ) %>% 
                str_sub(
                    start = click_id_pos + 1,
                    end = partner_event_data %>% nchar()
                ),
            has.unique.lead.id = !(lead_id %in% (
                df %>%
                    select(lead_id) %>%
                    group_by(lead_id) %>%
                    mutate(n = n()) %>%
                    ungroup() %>% 
                    filter(n > 1) %>% 
                    .$lead_id
            ))
        )
    
}

lenderedge.response.subset = lenderedge.response %>% getResponseSubset()

## Map

In [None]:
# https://austinwehrwein.com/digital-humanities/creating-a-density-map-in-r-with-zipcodes/

In [None]:
library(zipcode)
library(tidyverse)
library(maps)
library(viridis)
library(ggthemes)
library(albersusa)

In [None]:
zip.codes = read_csv("../data/zip_code_database.csv")

In [None]:
valid.zip = zip.codes %>%
    filter(
        !state %in% c('AA', 'AE', 'AP', 'AS', 'FM', 'GU', 'MH', 'MP', 'PR', 'PW', 'VI') &
        decommissioned == 0 &
        nchar(zip) == 5
    ) %>% 
    select(
        zip,
        state,
        latitude,
        longitude
    )

In [None]:
us = ggplot2::map_data('state')

In [None]:
us2 = ggplot2::map_data('county')

## Combined

In [None]:
lenderedge = lenderedge.response.subset %>%
    left_join(
        df.lenderedge.comparison,
        by = c('lead_id' = 'lead_id')
    ) %>%
    left_join(
        valid.zip,
        by = c('zip' = 'zip')
    )

In [None]:
theme_set(theme_bw())

### _LenderEdge_

* Questions:
    - When have these apps come in?
    - What different values do we receive?

# Univariate
### Do any variables have a direct correlation with funding?

In [None]:
lenderedge %>% str()

## Geography
- Volume centralized in state clusters, which have a gradient for FR.
- Some states/MSA show low conversion (red) on low volume (small)
- There is no concentrated state that funds better than others.

In [None]:
getValidZipPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            is.valid = zip %in% valid.zip$zip
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = is.valid,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 5
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getValidZipPlotFunnel(quo(funded), quo(qualified))

In [None]:
getZipPlotVolume = function (df) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            metric = sum(qualified)
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color gradient on the metric (Q Volume)  ##
        geom_point(
            mapping = aes(
                color = metric
            ),
            size = 0.8,
            alpha = 0.25
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
        labs(
            title = "LenderEdge 4: New Qualified Volume by Geography"
        ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9)
    
}

lenderedge %>% getZipPlotVolume()

In [None]:
getZipPlotFunnel = function (df, numerator.quo, denominator.quo) {
    
    ##  With full df, sum Qualified Volume by Zip Code.  ##
    df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            zip,
            longitude,
            latitude
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        ) %>% 
        ##  Plot geographic coodinates as cartesian points  ##
        ggplot(
            mapping = aes(
                x = longitude,
                y = latitude
            )
        ) +
        ##  Gray county lines and black state lines  ##
        geom_polygon(
            data = us2,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'gray',
                fill = NA,
                alpha = 0.35
        ) +
        geom_polygon(
            data = us,
            mapping =
                aes(
                    x = long,
                    y = lat,
                    group = group
                ),
                color = 'black',
                fill = NA,
                alpha = 0.35
        ) +
        ##  Place point for country with color, size, and transparency gradient on the metric (FR)  ##
        geom_point(
            mapping = aes(
                color = metric,
                size = n,
                alpha = metric
            )
            # size = 0.8,
            # alpha = 0.25
        ) +
        xlim(-125, -65) +
        ylim(25, 50) +
#         labs(
#             title = "LenderEdge 4: New FR by Geography"
#         ) +
        ##  Adjust sizing  ##
        coord_fixed(ratio = 16/9) +
        ##  Adjust coloring  ##
        scale_colour_gradientn(colours = c("brown3", "gold", "springgreen4"))
}

lenderedge %>% getZipPlotFunnel(quo(funded), quo(contractsigned))

In [None]:
getStatePlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            statecode
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )

    ##  Assign as factor and sort levels to assign ordering for plot.  ##
    summary.df$statecode = factor(summary.df$statecode, levels = summary.df$statecode[order(-summary.df$n)])
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = statecode,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 40
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getStatePlotFunnel(quo(funded), quo(qualified))

## DOB
- FR for older customers drops off.

In [None]:
getDOBPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            dob = ((.$lead_time %>% as.Date() - .$raw_dateofbirth %>% as.Date()) %>% as.numeric())/365,
            dob = dob %>% floor()
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = dob,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 100
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getDOBPlotFunnel(quo(funded), quo(qualified))

## Income
- FR for higher income customers becomes more volatile.

In [None]:
getIncomePlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            grossmonthlyincome = .$grossmonthlyincome/500,
            grossmonthlyincome = grossmonthlyincome %>% floor() * 500
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = grossmonthlyincome,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        xlim(0,30000) +
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 30000
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getIncomePlotFunnel(quo(funded), quo(qualified))

In [None]:
# income.type = df.lenderedge.comparison %>%
#     group_by(
#         raw_incometype  
#     ) %>% 
#     summarize(
#         n = n()
#     )

# income.type

## Bank

In [None]:
getABAPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        mutate(
            na.aba = is.na(abaroutingnumber)
        ) %>% 
        filter(
            qualified == 1 &
            !is.na(abaroutingnumber.is.valid) &
            !(!abaroutingnumber.is.valid & !na.aba)
        ) %>% 
        group_by(
            enter.aba = abaroutingnumber.is.valid
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = enter.aba,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 10
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getABAPlotFunnel(quo(funded), quo(qualified))

In [None]:
getAccountNumPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1 &
            !is.na(accountnumber.is.valid) &
            !(!accountnumber.is.valid & is.na(accountnumber))
        ) %>% 
        group_by(
            accountnumber.is.valid
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = accountnumber.is.valid,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 10
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getAccountNumPlotFunnel(quo(funded), quo(qualified))

In [None]:
getAccountTypePlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            raw_accounttype
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = raw_accounttype,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 15
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getAccountTypePlotFunnel(quo(funded), quo(qualified))

In [None]:
getAccountLengthPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            raw_accountlength
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = raw_accountlength,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 5
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getAccountLengthPlotFunnel(quo(funded), quo(qualified))

## Offer Amount

In [None]:
getOfferAmountPlotFunnel = function (df, numerator.quo, denominator.quo) {

    ##  With full df, sum Qualified Volume by State.  ##
    summary.df = df %>%
        filter(
            qualified == 1
        ) %>% 
        group_by(
            offer_amount
        ) %>% 
        summarize(
            n = n(),
            metric = sum(!!numerator.quo)/sum(!!denominator.quo)
        ) %>% 
        ungroup() %>% 
        filter(
            n >= 1 &
            metric < 1
        )
    
    ##  Bar Plot  ##
    summary.df %>% 
        ggplot(
            mapping = aes(
                x = offer_amount,
                y = metric
            )
        ) +
        geom_col(
            mapping = aes(
                fill = n
            )
        ) + 
        ##  Adjust sizing  ##
        coord_fixed(
            ratio = 5000
        ) +
        ##  Adjust coloring  ##
        scale_fill_gradient(
            low = "lightgray",
            high = "steelblue"
        )
    
}

lenderedge %>% getOfferAmountPlotFunnel(quo(funded), quo(qualified))

# Bivariate
### Do any variable pairs have a direct correlation with funding?

In [None]:
# https://learnr.wordpress.com/2010/01/26/ggplot2-quick-heatmap-plotting/

#### GMI <> DOB

In [None]:
lenderedge %>% str()