# To-Dos

In [1]:
## Outlier Removal
## Timing for Heap
## Time zones and time limiting/syncing for heap
## High light plot green/red
## BV/Q Number

# Import Packages

In [2]:
check.packages = function(pkg) {
    new.pkg = pkg[!(pkg %in% installed.packages()[, "Package"])]
    if (length(new.pkg)) 
        install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

In [4]:
check.packages(c('RPostgreSQL', 'RJDBC', 'ggplot2', 'dplyr', 'gridExtra', 'cowplot', 'rJava', 'ggpubr', 'lubridate', 'mailR'))

In [5]:
## Prevent scientific notation.
options(scipen = 999)

##Turn Warnings off.
options(warn = -1)

## Warnings back on.
# options(warn = 0)

# Connect to Databases

In [6]:
killDbConnections = function () {
  all_cons = dbListConnections(PostgreSQL())
  print(all_cons)
  for(con in all_cons)
    +  dbDisconnect(con)
  
#   print(paste(length(all_cons), " connections killed."))
}

In [7]:
checkConnections = function() {
    
    print(paste(length(dbListConnections(PostgreSQL())), " connections active."))
}

### Reporting

In [8]:
createReportingConnectionObject = function() {
    
    killDbConnections()
    
    drv = dbDriver('PostgreSQL')

    reporting = dbConnect(
      drv,
      dbname = 'reporting',
      host = 'reporting.ckpglb17yttu.us-east-1.rds.amazonaws.com',
      port = 5432,
      user = Sys.getenv('REPORTING_USER'),
      password = Sys.getenv('REPORTING_PASS')
    )
}

### Heap

In [9]:
createHeapConnectionObject = function() {
    
    killDbConnections()
    
    drv = dbDriver('PostgreSQL')

    reporting = dbConnect(
      drv,
      dbname = 'oppreddb',
      host = 'oppred.cyt7r7wgkc6b.us-east-1.redshift.amazonaws.com',
      port = 5439,
      user = Sys.getenv('HEAP_USER'),
      password = Sys.getenv('HEAP_PASS')
    )
}

# Get Data

### Reporting -- Funnel

In [10]:
getLatestReportingRefresh = function() {
    
    dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                date_trunc('hour', min(createddate_)) as createddate_
            from
                (
                select
                    max(createddate at time zone 'America/Chicago') as createddate_
                from
                    cloudlending.application_status_history
                union all
                select
                    max(createddate at time zone 'America/Chicago') as createddate_
                from
                    cloudlending.applications
                union all
                select
                    max(createddate at time zone 'America/Chicago') as createddate_
                from
                    cloudlending.microbilt_information
                ) as latest_dates
            ",
            sep = ''
        )
    )$createddate_
}

In [11]:
getFunnel = function(date.string) {
    
    df = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            with time_limit_gmt as 
            (
                select
                    date_trunc('hour', min(createddate_)) as createddate_
                from
                    (
                    select
                        max(createddate) as createddate_
                    from
                        cloudlending.application_status_history
                    union all
                    select
                        max(createddate) as createddate_
                    from
                        cloudlending.applications
                    union all
                    select
                        max(createddate) as createddate_
                    from
                        cloudlending.microbilt_information
                    ) as latest_dates
            )  
            select
                (c_ash.createddate at time zone 'America/Chicago')::date as dayofyear
                , extract(dow from c_ash.createddate at time zone 'America/Chicago') as dayofweek
                , extract(hour from c_ash.createddate at time zone 'America/Chicago') as hourofday
                , count(distinct case 	when old_value = 'BUSINESS RULES PASSED' and new_value = 'BUREAU APPROVED' then c_app.id end) as qualified
                , count(distinct case 	when new_value = 'BANK VERIFICATION COMPLETED' then c_app.id end) as bankverified
                , count(distinct case 	when old_value = 'NEW - PRICING GENERATED' and new_value in ('CONTRACT SIGNED', 'WAITING ON STIPULATIONS') then c_app.id end) as passscorecardratecard
                , count(distinct case 	when old_value in ('NEW - PRICING GENERATED', 'WAITING ON STIPULATIONS') and new_value = 'CONTRACT SIGNED' then c_app.id end) as contractsigned
                , count(distinct case 	when (c_ash.old_value in ('NEW - PRICING GENERATED', 'WAITING ON STIPULATIONS') and c_ash.new_value = 'CONTRACT SIGNED' and c_app.denialreason in ('Agent', 'Denied - Bankruptcy', 'Denied - First Payment Date', 'Verify'))
                                        or (c_ash.new_value = 'LOAN APPROVED' and c_ash.old_value != c_ash.new_value)
                                        then c_app.id end) as cs_decisioned
                , count(distinct case 	when new_value = 'LOAN APPROVED' and old_value != new_value then c_app.id end) as funded
                , case 	when (c_ash.createddate at time zone 'America/Chicago')::date = '", date.string, "'::date
                        then 'test'	
                        else 'training'
                        end as grouping
                , count(distinct case 	when ibv_source = 'DecisionLogic' then c_app.id end) as ", '"bankverified.dl"', "
                , count(distinct case 	when ibv_source = 'MicroBilt' then c_app.id end) as ", '"bankverified.mb"', " 
            from
                cloudlending.applications as c_app
                inner join
                    cloudlending.application_status_history as c_ash
                    on c_app.id = c_ash.application
                left join
                    cloudlending.microbilt_information as c_mb
                    on c_ash.application = c_mb.application
                    and abs(extract(epoch from c_ash.createddate - c_mb.createddate)) <= 30
                    and c_ash.new_value = 'BANK VERIFICATION COMPLETED'
                inner join
                    time_limit_gmt
                    on TRUE
            where
                c_app.createddate < createddate_
                and c_ash.createddate < createddate_
                and (c_ash.createddate at time zone 'America/Chicago')::date >= '", date.string, "'::date - '3 months'::interval
                and (c_ash.createddate at time zone 'America/Chicago')::date <= '", date.string, "'::date
                and type_formula = 'New'
            group by
                dayofyear
                , dayofweek
                , hourofday
                , grouping
            order by
                dayofyear asc
                , hourofday asc
            ",
            sep = ''
        )
    )
    
    return(df)
}

### Heap -- IBV Attempts

In [62]:
getLatestHeapRefresh = function() {
    
    dbGetQuery(
        createHeapConnectionObject(),
        paste(
            "
            select
                date_trunc('hour', min(time_)) as time_
            from
                (
                select
                    max(time at time zone 'GMT' at time zone 'America/Chicago') as time_
                from
                    main_production._completed_decisionlogic_iframe_
                union all
                select
                    max(time at time zone 'GMT' at time zone 'America/Chicago') as time_
                from
                    main_production._completed_microbilt_iframe_
                ) as latest_dates
            ",
            sep = ''
        )
    )$time_
}

In [13]:
getAttemptDLRaw = function(date.string) {
    
    df.heap = dbGetQuery(
        createHeapConnectionObject(),
        paste(
            "
            select
                h_users.", '"', 'identity', '"', " as contact
                , h_dl.time at time zone 'GMT' at time zone 'America/Chicago' as time_
                , 'DecisionLogic' as ibv_source
                , case  when (h_dl.time at time zone 'GMT' at time zone 'America/Chicago')::date = '", date.string, "'::date
                        then 'test'
                        else 'training'
                        end as grouping
            from
                main_production._completed_decisionlogic_iframe_ as h_dl
                inner join
                    main_production.users as h_users
                    on h_dl.user_id = h_users.user_id
            where
                (h_dl.time at time zone 'GMT' at time zone 'America/Chicago')::date >= '", date.string, "'::date - '3 months'::interval
                and (h_dl.time at time zone 'GMT' at time zone 'America/Chicago')::date <= '", date.string, "'::date
            ",
            sep = ''
        )
    )

    df.reporting = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                name
                , createddate at time zone 'America/Chicago' as createddate_
                , contact
            from
                cloudlending.applications
            where 
              type_formula = 'New'
              and (createddate at time zone 'America/Chicago')::date >= '", date.string, "'::date - '4 months'::interval
              and (createddate at time zone 'America/Chicago')::date <= '", date.string, "'::date
            "
        )
    )
    

    df.merged = merge(
        x = df.heap,
        y = df.reporting,
        by.x = 'contact',
        by.y = 'contact'
    )
    
    df.boxed = df.merged[
        which(
            df.merged$time_ >= df.merged$createddate_ & 
            as.Date(df.merged$time_) <= as.Date(df.merged$createddate_) + 15
        ), ]
    
    df.unique = df.boxed %>%
        group_by(
            name,
            grouping
        ) %>%
        summarize(
            time_ = min(time_)
        )

    return(df.unique)
    
}

In [14]:
getAttemptMBRaw = function(date.string) {
    
    df.heap = dbGetQuery(
        createHeapConnectionObject(),
        paste(
            "
            select
                h_users.", '"', 'identity', '"', " as contact
                , h_mb.time at time zone 'GMT' at time zone 'America/Chicago' as time_
                , 'MicroBilt' as ibv_source
                , case  when (h_mb.time at time zone 'GMT' at time zone 'America/Chicago')::date = '", date.string, "'::date
                        then 'test'
                        else 'training'
                        end as grouping
            from
                main_production._completed_microbilt_iframe_ as h_mb
                inner join
                    main_production.users as h_users
                    on h_mb.user_id = h_users.user_id
            where
              (h_mb.time at time zone 'GMT' at time zone 'America/Chicago')::date >= '", date.string, "'::date - '3 months'::interval
              and (h_mb.time at time zone 'GMT' at time zone 'America/Chicago')::date <= '", date.string, "'::date
            ",
            sep = ''
        )
    )

    df.reporting = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                name
                , createddate at time zone 'America/Chicago' as createddate_
                , contact
            from
                cloudlending.applications
            where 
              type_formula = 'New'
              and (createddate at time zone 'America/Chicago')::date >= '", date.string, "'::date - '4 months'::interval
              and (createddate at time zone 'America/Chicago')::date <= '", date.string, "'::date
            "
        )
    )
    

    df.merged = merge(
        x = df.heap,
        y = df.reporting,
        by.x = 'contact',
        by.y = 'contact'
    )
    
    df.boxed = df.merged[
        which(
            df.merged$time_ >= df.merged$createddate_ & 
            as.Date(df.merged$time_) <= as.Date(df.merged$createddate_) + 15
        ), ]
    
    df.unique = df.boxed %>%
        group_by(
            name,
            grouping
        ) %>%
        summarize(
            time_ = min(time_)
        )

    
    return(df.unique)
    
}

In [15]:
getAttemptBothRaw = function (dl.data.raw, mb.data.raw) {
    
    data.both = rbind(
        dl.data.raw, 
        mb.data.raw
    )
    
    df.unique = data.both %>%
        group_by(
            name,
            grouping
        ) %>%
        summarize(
            time_ = min(time_)
        )
    
    return(df.unique)
    
}

In [16]:
getAttemptAggregate = function(data, ibv.provider = NA) {
    
    df.unique = data
    
    df.aggregated = df.unique %>%
        group_by(
            dayofyear = date(time_),
            dayofweek = ifelse(
                wday(time_, week_start = getOption("lubridate.week.start", 1)) == 7,
                0,
                wday(time_, week_start = getOption("lubridate.week.start", 1))),
            hourofday = hour(time_),
            grouping
        ) %>%
        summarize(
            attempt.ibv = n_distinct(name)
        )

    if (!is.na(ibv.provider)) {
        colnames(df.aggregated)[which(colnames(df.aggregated) == 'attempt.ibv')] = ifelse(
            ibv.provider == 'DecisionLogic',
            'attempt.dl',
            'attempt.mb'
        )
    }
    
    return(df.aggregated)
    
}

### Merge Heap to Funnel

In [17]:
getDf = function(reporting.data, dl.data.raw, mb.data.raw) {
    
    dl.data = getAttemptAggregate(
        data = dl.data.raw,
        ibv.provider = 'DecisionLogic'
    )
    mb.data = getAttemptAggregate(
        data = mb.data.raw,
        ibv.provider = 'MicroBilt'
    )
    both.data = getAttemptAggregate(
        data = getAttemptBothRaw(
            dl.data.raw = dl.data.raw,
            mb.data.raw = mb.data.raw
        )
    )
    
    merged.data.dl = merge(
        x = reporting.data,
        y = dl.data,
        by.x = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        by.y = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        all.x = TRUE
    )
    
    merged.data.mb = merge(
        x = merged.data.dl,
        y = mb.data,
        by.x = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        by.y = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        all.x = TRUE
    )
    
    merged.data.both = merge(
        x = merged.data.mb,
        y = both.data,
        by.x = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        by.y = c('dayofyear', 'dayofweek', 'hourofday', 'grouping'),
        all.x = TRUE
    )
    
#     merged.data.both$attempt.dl = ifelse(
#         is.na(merged.data.both$attempt.dl),
#         0,
#         merged.data.both$attempt.dl
#     )
#     merged.data.both$attempt.mb = ifelse(
#         is.na(merged.data.both$attempt.mb),
#         0,
#         merged.data.both$attempt.mb
#     )
#     merged.data.both$attempt.ibv = ifelse(
#         is.na(merged.data.both$attempt.ibv),
#         0,
#         merged.data.both$attempt.ibv
#     )
    
    
    return(merged.data.both)
    
}

In [18]:
limitDf = function(data, numerator.string = NA, denominator.string = NA) {
        
    df = data[ , which(colnames(data) %in% c('dayofyear', 'dayofweek', 'hourofday', 'grouping', denominator.string, numerator.string))]
    df.reorder = df[ , c(
        which(colnames(df) == 'dayofyear'),
        which(colnames(df) == 'dayofweek'),
        which(colnames(df) == 'hourofday'),
        which(colnames(df) == 'grouping'),
        which(colnames(df) == denominator.string),
        which(colnames(df) == numerator.string))]
    
    colnames(df.reorder) = c('dayofyear', 'dayofweek', 'hourofday', 'grouping', 'denominator', 'numerator')
    
    
    df.reorder$denominator = ifelse(
        is.na(df.reorder$denominator),
        0,
        df.reorder$denominator
    )
    df.reorder$numerator = ifelse(
        is.na(df.reorder$numerator),
        0,
        df.reorder$numerator
    )
    
    df.ordered = df.reorder[order(df.reorder$dayofyear, df.reorder$hourofday), ]
    
    
    need.heap.tag.strings = c('attempt.dl', 'attempt.mb', 'attempt.ibv')
    df.ordered.heap.tagged = data.frame(
        df.ordered,
        heap.time.tag = ifelse(
            numerator.string %in% need.heap.tag.strings |
            denominator.string %in% need.heap.tag.strings,
            rep(1, nrow(df.ordered)),
            rep(0, nrow(df.ordered))
        )
    )
    
    
    return(df.ordered.heap.tagged)
}

In [19]:
# ## 2018-12-18 -- Wells Fargo DL Outage
# ## 2018-12-20 -- Everything OK
# ## 2010-01-01 -- Complete DL Outage
# ## 2019-01-04 -- Chase DL Errors
# ## 2019-01-05 -- Chase DL Errors
# ## 2019-01-06 -- Chase DL Errors
# ## 2019-01-07 -- Chase DL Errors
# ## 2019-01-08 -- Chase DL Errors
# ## 2019-01-13 -- NFCU DL Errors
# ## 2019-01-17 -- Everything OK
# ## 2019-01-18 -- Everything OK
# ## 2019-01-19 -- Everything OK
# ## 2019-01-20 -- Everything OK
# ## 2019-01-21 -- Everything OK
# ## 2019-01-22 -- Everything OK
# # 2019-01-23 -- Everything OK
# # 2019-01-29 -- Early DL Issues with Chase
# # 2019-01-30 -- FUNNEL



# # df$dayofyear = as.Date(df$dayofyear)
# # df$grouping = as.character(df$grouping)

# # head(df)
# # str(df)

### Reporting -- Checks and Balances: Apps in Contract Signed

In [None]:
checkCS = dbGetQuery(
    createReportingConnectionObject(),
    "
    select 
        max(createddate at time zone 'America/Chicago') as last_update
        , count(case when status = 'CONTRACT SIGNED' then id end) as count_cs
    from
        cloudlending.applications
    where
        createddate >= now() - '15 days'::interval
    "
)

In [116]:
checkUWQ = dbGetQuery(
    createReportingConnectionObject(),
    "
    select
        max(createddate_) as last_update
        , count(*) as count_uwq
    from
        (
            select
                c_hist.parentid as application
                , c_hist.createddate at time zone 'America/Chicago' as createddate_
                , c_hist.newvalue
                , row_number() over (partition by c_hist.parentid order by c_hist.createddate desc) as rn
            from
                cloudlending.applications_history as c_hist
                inner join
                    cloudlending.applications as c_app
                    on c_hist.parentid = c_app.id
            where
                c_hist.field = 'Owner'
                and c_app.status not in ('DENIED', 'LOAN APPROVED')
      ) as with_rn
    where
        rn = 1
        and newvalue = '00G50000002rxZ4EAI'
    "
)

[[1]]
<PostgreSQLConnection>



# Clean the data

### Get Information on the "Session_Date"

In [20]:
## Input data frame.
## Output list of DOY, DOW, HOURS.

getSessionInfo = function(data) {
    
    df = data
    
    current.date = tail(
        df$dayofyear,
        1
    )

    current.day = tail(
        df$dayofweek,
        1
    )

    current.hours = seq(
        from = 0,
        to = 23,
        by = 1
    )

    session.info = list(
        date = current.date,
        dow = current.day,
        hours = current.hours
    )
    
    return(session.info)
    
}

### Get Aggregated Data for the "Session_Date" (Test)

In [21]:
## Input data frame.
## Output aggregated (cumulative) data for TEST DATA.

getSessionData = function(data, cumulative) {
    
    ## Create local variable on the parameter:DATA 
    df = data
    df.session = df[which(df$grouping == 'test'), ]

    
    if (cumulative == TRUE) {
        
        session.point.estimates = 
            df.session %>%
            group_by(
                dayofyear
            ) %>%
    #         mutate(
            transmute(
                hourofday = hourofday,
                denominator.session = cumsum(denominator),
                numerator.session = cumsum(numerator),
                ratio.session = ifelse(
                    cumsum(denominator) > 0,
                    cumsum(numerator)/cumsum(denominator),
                    0
                )
            )
    } else if (cumulative == FALSE) {
        
        session.point.estimates = 
        df.session %>%
        group_by(
            dayofyear,
            hourofday
        ) %>%
        summarize(
            denominator.session = sum(denominator),
            numerator.session = sum(numerator),
            ratio.session = ifelse(
                sum(denominator) > 0,
                sum(numerator)/sum(denominator),
                0
            )
        )
    }
    
    if (max(df$heap.time.tag == 1)) {
    
        session.point.estimates$denominator.session[which(session.point.estimates$hourofday > hour(getLatestHeapRefresh()))] = 0
        session.point.estimates$numerator.session[which(session.point.estimates$hourofday > hour(getLatestHeapRefresh()))] = 0
        session.point.estimates$ratio.session[which(session.point.estimates$hourofday > hour(getLatestHeapRefresh()))] = 0
    }
    
    return(session.point.estimates)
}

### Get Aggregated Data for History (Training)

##### Make Cumulative

In [22]:
## Input data frame.
## Output mutated (cumulative) data for TRAINING DATA.
## Reliant on getSessionInfo().

makeCumulative = function(data) {
    
    df = data
    
    df.hist = df[which(df$grouping == 'training'), ]
    
    df.hist.mutate = 
        df.hist[which(df.hist$dayofweek == getSessionInfo(data = df)$dow), ] %>%
        group_by(
            dayofyear
        ) %>%
        transmute(
            dayofweek = dayofweek,
            hourofday = hourofday,
            denominator = cumsum(denominator),
            numerator = cumsum(numerator)
        )
    
    return(df.hist.mutate)
    
}

##### Aggregate

In [23]:
## Input data frame.
## Output aggregated (cumulative) data for TRAINNG DATA.
## Reliant on makeCumulative().

getHistoricalData = function(data, cumulative) {
    
    ## Create local variable on the parameter:DATA     
    df = data
    
    
    if (cumulative == TRUE) {
    
        df.hist.mutate = makeCumulative(data = df)

        historical.point.estimates = 
            df.hist.mutate %>%
            group_by(
                hourofday
            ) %>%
            summarize(
                denominator.hist = sum(denominator),
                numerator.hist = sum(numerator),
                r.hist = sum(numerator)/sum(denominator),
                n.hist = sum(denominator),
                sd.denominator.hist = sd(denominator),
                sd.numerator.hist = sd(numerator),
                mu.denominator.hist = mean(denominator),
                corr.hist = ifelse(is.na(cor(denominator, numerator)), 0, cor(denominator, numerator)),
                se.hist = sqrt(
                    (
                        (
                            sd(numerator)^2 + 
                            sd(denominator)^2 * (sum(numerator)/sum(denominator))^2 -
                            2 * sum(numerator)/sum(denominator) * ifelse(is.na(cor(denominator, numerator)),0,cor(denominator, numerator)) * sd(numerator) * sd(denominator)
                        ) /
                        (
                            sum(denominator) *
                            mean(denominator)^2
                        )
                    )
                )
            )
    } else if (cumulative == FALSE) {

        df.hist = df[which(df$grouping == 'training'), ]

        historical.point.estimates = 
            df.hist[which(df.hist$dayofweek == getSessionInfo(data = df)$dow), ] %>%
            group_by(
                hourofday
            ) %>%
            summarize(
                denominator.hist = sum(denominator),
                numerator.hist = sum(numerator),
                r.hist = sum(numerator)/sum(denominator),
                n.hist = sum(denominator),
                sd.denominator.hist = sd(denominator),
                sd.numerator.hist = sd(numerator),
                mu.denominator.hist = mean(denominator),
                corr.hist = ifelse(is.na(cor(denominator, numerator)), 0, cor(denominator, numerator)),
                se.hist = sqrt(
                    (
                        (
                            sd(numerator)^2 + 
                            sd(denominator)^2 * (sum(numerator)/sum(denominator))^2 -
                            2 * sum(numerator)/sum(denominator) * ifelse(is.na(cor(denominator, numerator)),0,cor(denominator, numerator)) * sd(numerator) * sd(denominator)
                        ) /
                        (
                            sum(denominator) *
                            mean(denominator)^2
                        )
                    )
                )
            )
    }

    return(historical.point.estimates)
}

# Run the bootstrap

### Construct Data Frame to store final information for ggplot.

In [24]:
## Input data frame.
## Output Initialized CI data frame.
## Reliant on getSessionDataCumulative() and getHistoricalDataCumulative().

getInitialCI = function(data, cumulative) {
    
    df = data
    
    df.session = getSessionData(data = df, cumulative = cumulative)
    df.historical = getHistoricalData(data = df, cumulative = cumulative)

    ci.information = data.frame(

        date.session = rep(max(as.Date(df.session$dayofyear)),24),
        hour.session = df.historical$hourofday,
        numerator.session = c(df.session$numerator.session, rep(0, 24 - length(df.session$numerator.session))),
        denominator.session = c(df.session$denominator.session, rep(0, 24 - length(df.session$denominator.session))),
        r.session = c(df.session$ratio.session, rep(0, 24 - length(df.session$ratio.session))),
        r.hist = df.historical$r.hist,
        se.hist = df.historical$se.hist,
        z.lower = rep(0,24),
        z.upper = rep(0,24),
        ci.lower = rep(0,24),
        ci.upper = rep(0,24)
    )
    
    return(ci.information)
}

### Execute the Bootstrap.

In [25]:
## Input data frame and Bootstrap parameters.
## Output Finalized CI data frame.
## Reliant on getInitialCI() and makeCumulative().

getBootstrapInterval = function(data, cumulative, B, alpha) {
    
    df = data
    
#     ## Set Start Time.
#     start_time = Sys.time()
    
    ## Initialize final output data frame.
    ci.information = getInitialCI(data = df, cumulative = cumulative)
    
    
    
    ## Initialize data for bootstrap.
    if (cumulative == TRUE) {
        
        df.training.dow = makeCumulative(data = df)
    } else if (cumulative == FALSE) {
        
        df.training = df[which(df$grouping == 'training'), ]
        df.training.dow = df.training[which(df.training$dayofweek == getSessionInfo(data = df)$dow), ]
    }
    
        
    
    ## Initialize container for bootstrap.
    bootstrap_z = vector(length = B)
    
    ## Check for enough sample size
#     histPDF = getHistoricalData(
#         data = data,
#         cumulative = FALSE
#     )
    
    
    for (i in 1:24) {
        
#         if (histPDF$numerator.hist >= nrow(df.training.dow.hour) {

            for (j in 1:B) {



                ## Subset the data to the current HOD (i).
                df.training.dow.hour = df.training.dow[which(df.training.dow$hourofday == i - 1), ]

                index = seq(
                    1, 
                    nrow(df.training.dow.hour)
                )
                resample_index = sample(
                    x = index, 
                    size = length(index), 
                    replace = TRUE
                )
                df.training.dow.hour.resample = df.training.dow.hour[resample_index, ]


                ## Calculate statistics from the resample.
                r = sum(df.training.dow.hour.resample$numerator)/sum(df.training.dow.hour.resample$denominator)
                n = sum(df.training.dow.hour.resample$denominator)
                sx = sd(df.training.dow.hour.resample$denominator)
                sy = sd(df.training.dow.hour.resample$numerator)
                mx = mean(df.training.dow.hour.resample$denominator)
                corr = ifelse(is.na(cor(df.training.dow.hour.resample$numerator, df.training.dow.hour.resample$denominator)),0,cor(df.training.dow.hour.resample$numerator, df.training.dow.hour.resample$denominator))

                se = sqrt(
                    (r^2*sx^2 + sy^2 - 2*r*corr*sx*sy)/
                    (n*mx^2)
                )


                ## Calculate statistics from the training data.
                mu = ci.information$r.hist[i]


                ## Calculate the bootstrap Z
                bootstrap_z[j] = (r - mu)/se
            }
#         }


        ## For each hour, take Percentiles of the Bootstrap Z vector to caluclate the confidence interval for that hour.
        bootstrap_z = sort(bootstrap_z)

        ci.information$z.lower[i] = bootstrap_z[alpha/2*B]
        ci.information$z.upper[i] = bootstrap_z[(1-alpha/2)*B]

        ci.information$ci.lower = ci.information$r.hist - ci.information$z.upper * ci.information$se.hist
        ci.information$ci.upper = ci.information$r.hist - ci.information$z.lower * ci.information$se.hist
    }
    
    ci.information$ci.lower = ifelse(
        is.na(ci.information$ci.lower),
        0,
        ci.information$ci.lower
    )
    
    return(ci.information)
}

### Smooth the CI.Lower Line

In [26]:
smoothLowerBound = function(ci.information, iterations) {
    
    ci.information.smooth = ci.information
    
    if (iterations >= 1) {
    
        for (i in 1:iterations) {

            for (j in (0+1):(23-1)) {

                ci.information.smooth$ci.lower[which(ci.information.smooth$hour.session == j)] = mean(
                    c(ci.information.smooth$ci.lower[which(ci.information.smooth$hour.session == j)-1], ci.information.smooth$ci.lower[which(ci.information.smooth$hour.session == j)+1])
                )
            }        
        }
    }
    
    return(ci.information.smooth)

}

# Create the ggplot

In [27]:
getFunnelName = function(atStatus_string) {
    
    return_string = ''
    
    if (atStatus_string == 'qualified') {
        return_string = 'Q'
    }
    else if (atStatus_string == 'attempt.ibv') {
        return_string = 'Attempt IBV'
    }
    else if (atStatus_string == 'attempt.dl') {
        return_string = 'Attempt DL'
    }
    else if (atStatus_string == 'attempt.mb') {
        return_string = 'Attempt MB'
    }
    else if (atStatus_string == 'bankverified') {
        return_string = 'BV'
    }
    else if (atStatus_string == 'bankverified.dl') {
        return_string = 'Success DL'
    }
    else if (atStatus_string == 'bankverified.mb') {
        return_string = 'Success MB'
    }
    else if (atStatus_string == 'passscorecardratecard') {
        return_string = 'SC'
    }
    else if (atStatus_string == 'contractsigned') {
        return_string = 'CS'
    }
    else if (atStatus_string == 'cs_decisioned') {
        return_string = 'CS Decisioned'
    }
    else if (atStatus_string == 'funded') {
        return_string = 'F'
    }
    
    return(return_string)
    
}

In [28]:
getFunnelColor = function(atStatus_string) {
    
    color.decimal = ''

    
    if (atStatus_string == 'attempt.ibv') {
        color.decimal = '#E76BF3'
    }
    else if (atStatus_string == 'bankverified.dl') {
        color.decimal = '#00B0F6'
    }
    else if (atStatus_string == 'bankverified.mb') {
        color.decimal = '#E58700'
    }
    else if (atStatus_string == 'bankverified') {
        color.decimal = '#619CFF'
    }
    else if (atStatus_string == 'passscorecardratecard') {
        color.decimal = '#00C0AF'
    }
    else if (atStatus_string == 'contractsigned') {
        color.decimal = '#B983FF'
    }
    else if (atStatus_string == 'cs_decisioned') {
        color.decimal = '#C99800'
    }
    else if (atStatus_string == 'funded') {
        color.decimal = '#619CFF'
    }
    
    return(color.decimal)
    
}

In [140]:
getSupportingTextPlot = function () {
    
    supporting.text = paste(
        '\n As of', checkCS$last_update, 'there were', checkCS$count_cs, 'apps that are in CONTRACT SIGNED. \n\n',
        'As of', checkUWQ$last_update, 'there were', checkUWQ$count_uwq, 'apps that are in the UW Queue.'
    )
    
    ggplot() + labs(caption = supporting.text)
    
    
}

In [104]:
getBootstrapPlot = function(df.bi, numerator.string = NA, denominator.string = NA, alpha, cumulative) {
    
    df = df.bi
    
    plot.alpha = paste(round(alpha*100,0), '%', sep = '')
    plot.cumulative = ifelse(
        cumulative == TRUE,
        '(Cumulative)',
        '(Not Cumulative)'
    )
    
        
    plot.funnel = ifelse(
        paste(getFunnelName(numerator.string),'/',getFunnelName(denominator.string), sep = '') == 'F/Q',
        'Funding Rate',
        paste(getFunnelName(numerator.string),'/',getFunnelName(denominator.string), sep = '')
    )
    plot.color = ifelse(
        plot.funnel == 'Funding Rate',
        '#FD61D1',
        getFunnelColor(numerator.string)
    )
    
    plot.volume.check = ifelse(
        cumulative == TRUE,
        paste(
            getFunnelName(numerator.string), ': ', max(df$numerator.session), ' // ',
            getFunnelName(denominator.string), ': ', max(df$denominator.session),'\n',
            sep = ''
        ),
        paste(
            getFunnelName(numerator.string), ': ', sum(df$numerator.session), ' // ',
            getFunnelName(denominator.string), ': ', sum(df$denominator.session),'\n',
            sep = ''
        )
    )
    
    if (grepl('Attempt', plot.funnel)) {
        plot.refresh.check = getLatestHeapRefresh()
    } else {
        plot.refresh.check = getLatestReportingRefresh()
    }
    
    plot.refresh.check = paste('Last Data Update:', plot.refresh.check)
    
            
    
    df.plot = ggplot(
        data = df,
        mapping = aes(
            x = hour.session, 
            y = r.session
        )        
    ) +
    geom_col(
        fill = plot.color,
        color = 'black'
#         alpha = 0.5
    ) + 
    labs(
        x ='Hour of Day', 
        y = plot.funnel, 
        title = paste(
            df$date.session,
            "\n", plot.funnel 
        ),
        subtitle = paste(
            plot.cumulative, 'Intraday',
            "\n", 'Line:', plot.alpha,'lower bound confidence interval of bars'
        ),
        caption = paste(plot.volume.check, plot.refresh.check)
    ) + 
    geom_line(
        aes(
            x = hour.session, 
            y = ci.lower
        ), 
        color = 'black', 
        size=1.5
    ) + 
#     geom_line(
#         aes(
#             x = hour.session, 
#             y = ci.upper
#         ), 
#         color = 'gray', 
#         size=1
#     ) + 
    scale_y_continuous(
        labels = scales::percent 
    ) + 
    scale_x_continuous(
        breaks = seq(0,23,4)
    ) +
    theme(
        plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)
    )
    
    return(df.plot)
}

# Make all the above into a function

### Make a single plot.

In [30]:
getFinalPlot = function(
    data,
    numerator.string = NA, 
    denominator.string = NA,
    cumulative,
    B,
    alpha,
    smooth.iterations
) {
    
    plot = getBootstrapPlot(
        df.bi = smoothLowerBound(
            ci.information = getBootstrapInterval(
                data = data,
                cumulative = cumulative,
                B = B,
                alpha = alpha
            ),
            iterations = smooth.iterations
        ),
        numerator.string = numerator.string,
        denominator.string = denominator.string,
        alpha = alpha,
        cumulative = cumulative
    )
    
    return(plot)
    
}

In [137]:
getSalFinalPlots = function(data, B, alpha, smooth.iterations) {
    
    
    plot.7 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified',
            denominator.string = 'qualified'
        ),
        numerator.string = 'bankverified',
        denominator.string = 'qualified',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.8 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified',
            denominator.string = 'qualified'
        ),
        numerator.string = 'bankverified',
        denominator.string = 'qualified',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    

    plot.13 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'cs_decisioned',
            denominator.string = 'contractsigned'
        ),
        numerator.string = 'cs_decisioned',
        denominator.string = 'contractsigned',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.14 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'cs_decisioned',
            denominator.string = 'contractsigned'
        ),
        numerator.string = 'cs_decisioned',
        denominator.string = 'contractsigned',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    

    plot.5 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'attempt.ibv',
            denominator.string = 'qualified'
        ),
        numerator.string = 'attempt.ibv',
        denominator.string = 'qualified',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.6 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'attempt.ibv',
            denominator.string = 'qualified'
        ),
        numerator.string = 'attempt.ibv',
        denominator.string = 'qualified',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    
    
    plot.1 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.dl',
            denominator.string = 'attempt.dl'
        ),
        numerator.string = 'bankverified.dl',
        denominator.string = 'attempt.dl',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.2 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.dl',
            denominator.string = 'attempt.dl'
        ),
        numerator.string = 'bankverified.dl',
        denominator.string = 'attempt.dl',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )    

    plot.3 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.mb',
            denominator.string = 'attempt.mb'
        ),
        numerator.string = 'bankverified.mb',
        denominator.string = 'attempt.mb',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )  
    plot.4 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.mb',
            denominator.string = 'attempt.mb'
        ),
        numerator.string = 'bankverified.mb',
        denominator.string = 'attempt.mb',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )  
    plot.support = getSupportingTextPlot()
    
    all.plots = ggarrange(
        plot.7,
        plot.8,
        plot.13,
        plot.14,
        plot.5,
        plot.6,
        plot.1,
        plot.2,
        plot.3,
        plot.4,
        plot.support,
        ncol = 1
    )
 
    return(all.plots)
}

In [32]:
getAllFinalPlots = function(data, B, alpha, smooth.iterations) {
    
    start = Sys.time()
    
    plot.1 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.dl',
            denominator.string = 'attempt.dl'
        ),
        numerator.string = 'bankverified.dl',
        denominator.string = 'attempt.dl',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )    
    plot.2 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.dl',
            denominator.string = 'attempt.dl'
        ),
        numerator.string = 'bankverified.dl',
        denominator.string = 'attempt.dl',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )    

    plot.3 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.mb',
            denominator.string = 'attempt.mb'
        ),
        numerator.string = 'bankverified.mb',
        denominator.string = 'attempt.mb',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )    
    plot.4 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified.mb',
            denominator.string = 'attempt.mb'
        ),
        numerator.string = 'bankverified.mb',
        denominator.string = 'attempt.mb',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.5 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'attempt.ibv',
            denominator.string = 'qualified'
        ),
        numerator.string = 'attempt.ibv',
        denominator.string = 'qualified',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.6 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'attempt.ibv',
            denominator.string = 'qualified'
        ),
        numerator.string = 'attempt.ibv',
        denominator.string = 'qualified',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.7 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified',
            denominator.string = 'attempt.ibv'
        ),
        numerator.string = 'bankverified',
        denominator.string = 'attempt.ibv',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.8 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'bankverified',
            denominator.string = 'attempt.ibv'
        ),
        numerator.string = 'bankverified',
        denominator.string = 'attempt.ibv',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.9 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'passscorecardratecard',
            denominator.string = 'bankverified'
        ),
        numerator.string = 'passscorecardratecard',
        denominator.string = 'bankverified',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.10 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'passscorecardratecard',
            denominator.string = 'bankverified'
        ),
        numerator.string = 'passscorecardratecard',
        denominator.string = 'bankverified',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.11 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'contractsigned',
            denominator.string = 'passscorecardratecard'
        ),
        numerator.string = 'contractsigned',
        denominator.string = 'passscorecardratecard',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.12 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'contractsigned',
            denominator.string = 'passscorecardratecard'
        ),
        numerator.string = 'contractsigned',
        denominator.string = 'passscorecardratecard',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.13 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'cs_decisioned',
            denominator.string = 'contractsigned'
        ),
        numerator.string = 'cs_decisioned',
        denominator.string = 'contractsigned',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.14 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'cs_decisioned',
            denominator.string = 'contractsigned'
        ),
        numerator.string = 'cs_decisioned',
        denominator.string = 'contractsigned',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.15 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'funded',
            denominator.string = 'cs_decisioned'
        ),
        numerator.string = 'funded',
        denominator.string = 'cs_decisioned',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.16 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'funded',
            denominator.string = 'cs_decisioned'
        ),
        numerator.string = 'funded',
        denominator.string = 'cs_decisioned',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )

    plot.17 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'funded',
            denominator.string = 'qualified'
        ),
        numerator.string = 'funded',
        denominator.string = 'qualified',
        cumulative = TRUE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.18 = getFinalPlot(
        data = limitDf(
            data = data,
            numerator.string = 'funded',
            denominator.string = 'qualified'
        ),
        numerator.string = 'funded',
        denominator.string = 'qualified',
        cumulative = FALSE,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
    plot.19 = ggplot()
    plot.20 = ggplot()
    
    all.plots = ggarrange(
        plot.1,
        plot.2,
        plot.3,
        plot.4,
        plot.5,
        plot.6,
        plot.7,
        plot.8,
        plot.9,
        plot.10,
        plot.11,
        plot.12,
        plot.13,
        plot.14,
        plot.15,
        plot.16,
        plot.17,
        plot.18,
        plot.19,
        plot.20,
        ncol = 1
    )
 
    return(all.plots)
}

# Write all plots to a PDF/PNG.

In [33]:
exportAllFinalPlots = function (data, date.string, access.time = Sys.time(), B, alpha, smooth.iterations) {
    
    options(warn = -1)
    
    getAllFinalPlots(
        data = data,
        B = B, 
        alpha = alpha, 
        smooth.iterations = smooth.iterations
    ) %>%
    ggexport(
        filename = paste('C:/Users/jchang/Desktop/Projects/Funnel Anomaly Detection/results/', date.string, '-pulled-at-', date(access.time), '---', hour(access.time), '-', minute(access.time), '.pdf', sep = '')
    )    
}

In [34]:
exportSalFinalPlots = function (data, date.string, access.time, B, alpha, smooth.iterations) {
    
    options(warn = -1)
    
    getSalFinalPlots(
        data = data,
        B = B, 
        alpha = alpha, 
        smooth.iterations = smooth.iterations
    ) %>%
    ggexport(
        filename = paste('C:/Users/jchang/Desktop/Projects/Funnel Anomaly Detection/results/', date.string, '-pulled-at-', date(access.time), '---', hour(access.time), '-', minute(access.time), '-', '.png', sep = '')
    )    
}

# Execute the code.

In [35]:
executeExport = function(date.string, access.time, B = 100, alpha = 0.03, smooth.iterations = 1) {
    
    df = getDf(
        reporting.data = getFunnel(date.string = date.string),
        dl.data.raw = getAttemptDLRaw(date.string = date.string),
        mb.data.raw = getAttemptMBRaw(date.string = date.string)
    )
    
    
#     exportAllFinalPlots(
    exportSalFinalPlots(
        data = df,
        date.string = date.string,
        access.time = access.time,
        B = B,
        alpha = alpha,
        smooth.iterations = smooth.iterations
    )
}

# Automate an Email

In [77]:
getAttachmentString = function (date.string, access.time) {
    
    html.length = length(
        list.files(
            path = 'C:/Users/jchang/Desktop/Projects/Funnel Anomaly Detection/results',
            pattern = paste('^', date.string, '-pulled-at-', date(access.time), '---', hour(access.time), '-', minute(access.time), sep = '')
        )
    )

    
    
    if (html.length > 1) {

        html.vector = paste(
            rep(
                '<img src = "C:/Users/jchang/Desktop/Projects/Funnel Anomaly Detection/results/',
                times = html.length
            ),
            rep(
                x = paste(
                    date.string, '-pulled-at-', date(access.time), '---', hour(access.time), '-', minute(access.time), '-', 
                    sep = ''
                ),
                times = html.length
            ),
            sprintf('%03d', c(1:html.length)),
            rep(
                x = '.png">',
                times = html.length
            ),
            sep = ''
        )

    } else if (html.length == 1) {

        html.vector = paste(
            '<img src = "C:/Users/jchang/Desktop/Projects/Funnel Anomaly Detection/results/', date.string, '-pulled-at-', date(access.time), '---', hour(access.time), '-', minute(access.time), '-.png">', 
            sep = ''
        )

    } else {

        html.vector = ''
    }

    
    
    html.string = paste(
        html.vector,
        collapse = ' '
    )
    
    return(html.string)
}

In [141]:
executeEmail = function (date.string = Sys.Date(), access.time = Sys.time()) {
    
    executeExport(
        date.string = date.string,
        access.time = access.time
    )

    sender = 'jchang@opploans.com'
    recipients = c('jchang@opploans.com')
    send.mail(
        from = sender,
        to = recipients,
        subject = paste('Process Flow 7 IntraDay Report ---- ', month(date.string), '/', day(date.string), '/', year(date.string), ' at ', hour(access.time), ':', sprintf('%02d', minute(access.time)), sep = ''),
        body = getAttachmentString(
            date.string = date.string,
            access.time = access.time
        ),
        html = TRUE,
        inline = TRUE,
        smtp = list(
            host.name = 'smtp.gmail.com', 
            port = 465, 
            user.name = 'jchang@opploans.com',            
            passwd = Sys.getenv('R_GMAIL_2FAUTH'), 
            ssl = TRUE
        ),
        authenticate = TRUE,
        send = TRUE
    )
}

In [None]:
start = Sys.time()

executeEmail()

Sys.time() - start

# Checking the plots.

In [38]:
start = Sys.time()
a = getFunnel(date.string = Sys.Date())
Sys.time() - start

tail(a)

list()


Time difference of 3.369159 mins

Unnamed: 0,dayofyear,dayofweek,hourofday,qualified,bankverified,passscorecardratecard,contractsigned,cs_decisioned,funded,grouping,bankverified.dl,bankverified.mb
2218,2019-02-07,4,9,83,55,51,52,44,36,test,31,20
2219,2019-02-07,4,10,83,61,59,56,41,35,test,39,21
2220,2019-02-07,4,11,103,68,66,66,41,35,test,58,7
2221,2019-02-07,4,12,118,69,68,63,39,32,test,49,16
2222,2019-02-07,4,13,100,67,54,58,38,36,test,44,18
2223,2019-02-07,4,14,99,70,58,49,42,42,test,47,18


In [39]:
start = Sys.time()
b = getAttemptDLRaw(date.string = Sys.Date())
Sys.time() - start

tail(b)

[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>



Time difference of 36.47203 secs

name,grouping,time_
APP-0003778335,test,2019-02-07 12:47:46
APP-0003778340,test,2019-02-07 12:48:28
APP-0003778371,test,2019-02-07 12:54:42
APP-0003778383,test,2019-02-07 12:53:56
APP-0003778386,test,2019-02-07 12:58:20
APP-0003778388,test,2019-02-07 12:58:25


In [40]:
start = Sys.time()
c = getAttemptMBRaw(date.string = Sys.Date())
Sys.time() - start

tail(c)

[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>



Time difference of 26.90328 secs

name,grouping,time_
APP-0003778224,test,2019-02-07 12:40:21
APP-0003778244,test,2019-02-07 12:41:51
APP-0003778267,test,2019-02-07 12:44:57
APP-0003778276,test,2019-02-07 12:42:56
APP-0003778328,test,2019-02-07 12:48:42
APP-0003778331,test,2019-02-07 12:49:57


In [41]:
start = Sys.time()
d = getDf(
    reporting.data = a,
    dl.data.raw = b,
    mb.data.raw = c
)
Sys.time() - start

tail(d)

Time difference of 1.30066 mins

Unnamed: 0,dayofyear,dayofweek,hourofday,grouping,qualified,bankverified,passscorecardratecard,contractsigned,cs_decisioned,funded,bankverified.dl,bankverified.mb,attempt.dl,attempt.mb,attempt.ibv
2218,2019-02-07,4,9,test,83,55,51,52,44,36,31,20,43.0,31.0,64.0
2219,2019-02-07,4,10,test,83,61,59,56,41,35,39,21,54.0,31.0,76.0
2220,2019-02-07,4,11,test,103,68,66,66,41,35,58,7,73.0,19.0,85.0
2221,2019-02-07,4,12,test,118,69,68,63,39,32,49,16,70.0,29.0,87.0
2222,2019-02-07,4,13,test,100,67,54,58,38,36,44,18,,,
2223,2019-02-07,4,14,test,99,70,58,49,42,42,47,18,,,


In [69]:
e = limitDf(
    data = d,
    numerator.string = 'bankverified.dl', 
    denominator.string = 'attempt.dl'
)

tail(e)

Unnamed: 0,dayofyear,dayofweek,hourofday,grouping,denominator,numerator,heap.time.tag
2218,2019-02-07,4,9,test,43,31,1
2219,2019-02-07,4,10,test,54,39,1
2220,2019-02-07,4,11,test,73,58,1
2221,2019-02-07,4,12,test,70,49,1
2222,2019-02-07,4,13,test,0,44,1
2223,2019-02-07,4,14,test,0,47,1


In [70]:
f = getInitialCI(data = e, cumulative = TRUE)
f
f = getInitialCI(data = e, cumulative = FALSE)
f

[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>



date.session,hour.session,numerator.session,denominator.session,r.session,r.hist,se.hist,z.lower,z.upper,ci.lower,ci.upper
2019-02-07,0,8,10,0.8,0.6465517,0.0077331549,0,0,0,0
2019-02-07,1,17,20,0.85,0.6414634,0.0046901941,0,0,0,0
2019-02-07,2,19,24,0.7916667,0.6472727,0.0039101622,0,0,0,0
2019-02-07,3,26,31,0.8387097,0.6297376,0.0031658986,0,0,0,0
2019-02-07,4,35,43,0.8139535,0.6239814,0.0030253568,0,0,0,0
2019-02-07,5,43,54,0.7962963,0.6310212,0.002514717,0,0,0,0
2019-02-07,6,58,79,0.7341772,0.6506849,0.0020811632,0,0,0,0
2019-02-07,7,81,107,0.7570093,0.6879195,0.0020740208,0,0,0,0
2019-02-07,8,106,139,0.7625899,0.6964499,0.0014906564,0,0,0,0
2019-02-07,9,137,182,0.7527473,0.7064355,0.0013235784,0,0,0,0


[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>

[[1]]
<PostgreSQLConnection>



date.session,hour.session,numerator.session,denominator.session,r.session,r.hist,se.hist,z.lower,z.upper,ci.lower,ci.upper
2019-02-07,0,8,10,0.8,0.6465517,0.007733155,0,0,0,0
2019-02-07,1,9,10,0.9,0.6348315,0.008848579,0,0,0,0
2019-02-07,2,2,4,0.5,0.6642857,0.012017411,0,0,0,0
2019-02-07,3,7,7,1.0,0.5588235,0.010343868,0,0,0,0
2019-02-07,4,9,12,0.75,0.6011561,0.012644524,0,0,0,0
2019-02-07,5,8,11,0.7272727,0.6648045,0.011825524,0,0,0,0
2019-02-07,6,15,25,0.6,0.7246377,0.007218546,0,0,0,0
2019-02-07,7,23,28,0.8214286,0.7911392,0.007227043,0,0,0,0
2019-02-07,8,25,32,0.78125,0.7176634,0.002987344,0,0,0,0
2019-02-07,9,31,43,0.7209302,0.734375,0.003704472,0,0,0,0


In [None]:
# B = 100
# alpha = 0.03
# smooth.iterations = 2

In [None]:
# plot.1 = getFinalPlot(
#     data = e,
#     numerator.string = 'bankverified', 
#     denominator.string = 'qualified',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.1

In [None]:
# plot.1 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified.dl',
#         denominator.string = 'attempt.dl'
#     ),
#     numerator.string = 'bankverified.dl',
#     denominator.string = 'attempt.dl',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.1

In [None]:
# plot.2 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified.dl',
#         denominator.string = 'attempt.dl'
#     ),
#     numerator.string = 'bankverified.dl',
#     denominator.string = 'attempt.dl',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.2

In [None]:
# plot.3 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified.mb',
#         denominator.string = 'attempt.mb'
#     ),
#     numerator.string = 'bankverified.mb',
#     denominator.string = 'attempt.mb',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.3

In [None]:
# plot.4 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified.mb',
#         denominator.string = 'attempt.mb'
#     ),
#     numerator.string = 'bankverified.mb',
#     denominator.string = 'attempt.mb',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.4

In [None]:
# plot.5 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'attempt.ibv',
#         denominator.string = 'qualified'
#     ),
#     numerator.string = 'attempt.ibv',
#     denominator.string = 'qualified',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.5

In [None]:
# plot.6 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'attempt.ibv',
#         denominator.string = 'qualified'
#     ),
#     numerator.string = 'attempt.ibv',
#     denominator.string = 'qualified',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.6

In [None]:
# plot.7 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified',
#         denominator.string = 'attempt.ibv'
#     ),
#     numerator.string = 'bankverified',
#     denominator.string = 'attempt.ibv',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.7

In [None]:
# plot.8 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'bankverified',
#         denominator.string = 'attempt.ibv'
#     ),
#     numerator.string = 'bankverified',
#     denominator.string = 'attempt.ibv',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.8

In [None]:
# plot.9 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'passscorecardratecard',
#         denominator.string = 'bankverified'
#     ),
#     numerator.string = 'passscorecardratecard',
#     denominator.string = 'bankverified',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.9

In [None]:
# plot.10 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'passscorecardratecard',
#         denominator.string = 'bankverified'
#     ),
#     numerator.string = 'passscorecardratecard',
#     denominator.string = 'bankverified',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.10

In [None]:
# plot.11 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'contractsigned',
#         denominator.string = 'passscorecardratecard'
#     ),
#     numerator.string = 'contractsigned',
#     denominator.string = 'passscorecardratecard',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.11

In [None]:
# plot.12 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'contractsigned',
#         denominator.string = 'passscorecardratecard'
#     ),
#     numerator.string = 'contractsigned',
#     denominator.string = 'passscorecardratecard',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.12

In [None]:
# plot.13 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'cs_decisioned',
#         denominator.string = 'contractsigned'
#     ),
#     numerator.string = 'cs_decisioned',
#     denominator.string = 'contractsigned',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.13

In [None]:
# plot.14 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'cs_decisioned',
#         denominator.string = 'contractsigned'
#     ),
#     numerator.string = 'cs_decisioned',
#     denominator.string = 'contractsigned',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.14

In [None]:
# plot.15 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'funded',
#         denominator.string = 'cs_decisioned'
#     ),
#     numerator.string = 'funded',
#     denominator.string = 'cs_decisioned',
#     cumulative = TRUE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.15

In [None]:
# plot.16 = getFinalPlot(
#     data = limitDf(
#         data = d,
#         numerator.string = 'funded',
#         denominator.string = 'cs_decisioned'
#     ),
#     numerator.string = 'funded',
#     denominator.string = 'cs_decisioned',
#     cumulative = FALSE,
#     B = B,
#     alpha = alpha,
#     smooth.iterations = smooth.iterations
# )    
# plot.16

In [None]:
# start = Sys.time()
# getAllFinalPlots(
#     data = d,
#     B = 100,
#     alpha = 0.03,
#     smooth.iterations = 2    
# )
# Sys.time() - start

In [None]:
# start = Sys.time()
# x = getSalFinalPlots(
#     data = getDf(
#         reporting.data = getFunnel(date.string = Sys.Date()),
#         dl.data.raw = getAttemptDLRaw(date.string = Sys.Date()),
#         mb.data.raw = getAttemptMBRaw(date.string = Sys.Date())
#     ),
#     B = 100,
#     alpha = 0.03,
#     smooth.iterations = 1    
# )
x
# Sys.time() - start

In [None]:
# df.bi = getBootstrapInterval(
#     data = df,
#     B = 5000,
#     alpha = 0.03
# )$ci.information

# ggplot() +
# geom_line(
#     data = df.bi,
#     mapping = aes(
#         x = hour.session,
#         y = r.session
#     ),
#     color = 'skyblue',
#     size = 3
# ) +
# geom_line(
#     data = df.bi,
#     mapping = aes(
#         x = hour.session,
#         y = ci.lower
#     ),
#     color = 'red',
#     size = 1,
#     linetype = 6
# ) +
# geom_point(
#     data = makeCumulative(data = df),
#     mapping = aes(
#         x = hourofday,
#         y = dl.hist.cum/q.hist.cum
#     ),
#     color = 'black',
#     size = 0.8,
#     alpha = 0.1
# ) +
# # geom_ribbon(
# #     data = makeCumulative(data = df) %>%
# #         group_by(
# #             hourofday
# #         ) %>%
# #         summarize(
# #             ribbon.lower = min(dl.hist.cum/q.hist.cum),
# #             ribbon.upper = max(dl.hist.cum/q.hist.cum)
# #         ), 
# #     mapping = aes(
# #         x = hourofday,
# #         ymin = ribbon.lower,
# #         ymax = ribbon.upper
# #     ),
# #     color = 'black',
# #     size = 0.8,
# #     alpha = 0.1
# # ) +
# scale_y_continuous(
#     labels = scales::percent,
# #     limits = c(
# #         min(dl.hist.cum/q.hist.cum),
# #         max(dl.hist.cum/q.hist.cum),
# #     ) 
#     limits = c(0.20,0.70)
# ) 


# # + 
# # labs(
# #     x ="Time of Day", 
# #     y = "Success/Q", 
# #     title = paste(
# #         "DL ", getSessionInfo(data = df)$date,
# #         "\n", "Bars: Non-Cohorted DL Success/Q (Cumulative)", 
# #         "\n", "Line: 5% lower bound confidence interval of bars")
# # ) + 
# # geom_line(
# #     aes(
# #         x = hour.session, 
# #         y = ci.lower
# #     ), 
# #     color = 'red', 
# #     size=1
# # ) + 
# # # geom_line(
# # #     aes(
# # #         x = hour.session, 
# # #         y = ci.upper
# # #     ), 
# # #     color = 'blue', 
# # #     size=1
# # # ) + 
# # scale_x_continuous(
# #     breaks = seq(0,23,4)
# # )# + geom_line(aes(x=hourofday, y=upper), color='springgreen4', size=1) + scale_y_continuous(labels=scales::percent) + scale_x_continuous(breaks=seq(0,23,4))
