# Import libraries and configure outputs.

In [None]:
check.packages = function(pkg) {
    new.pkg = pkg[!(pkg %in% installed.packages()[, "Package"])]
    if (length(new.pkg)) 
        install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

check.packages(c('RPostgreSQL', 'RJDBC', 'ggplot2', 'dplyr', 'gridExtra', 'cowplot', 'rJava'))

In [None]:
options(scipen=999)

# Connect to database.

In [None]:
killReportingConnections = function () {
  all_cons = dbListConnections(PostgreSQL())
  print(all_cons)
  for(con in all_cons)
    +  dbDisconnect(con)
  
#   print(paste(length(all_cons), " connections killed."))
}

In [None]:
createReportingConnectionObject = function() {
    
    killReportingConnections()
    
    drv = dbDriver('PostgreSQL')

    reporting = dbConnect(
      drv,
      dbname = 'reporting',
      host = 'reporting.ckpglb17yttu.us-east-1.rds.amazonaws.com',
      port = 5432,
      user = Sys.getenv('REPORTING_USER'),
      password = Sys.getenv('REPORTING_PASS')
    )
    
    return(reporting)
}

In [None]:
checkReportingConnection = function() {
    
    print(paste(length(dbListConnections(PostgreSQL())), " connections active."))
}

In [None]:
killHeapConnections = function () {
#   all_cons = dbListConnections(JDBC())
  all_cons = dbListConnections(PostgreSQL())
  print(all_cons)
  for(con in all_cons)
    +  dbDisconnect(con)
  
#   print(paste(length(all_cons), " connections killed."))
}

In [None]:
createHeapConnectionObject = function() {

    killDbConnections()
    
    drv = dbDriver('PostgreSQL')

    
    heap = dbConnect(
        drv,
        dbname = 'oppreddb',
        host = 'oppred.cyt7r7wgkc6b.us-east-1.redshift.amazonaws.com',
        port = 5439,
        user = Sys.getenv('HEAP_USER'),
        password = Sys.getenv('HEAP_PASS')
    )
 
    return(heap)
}

In [None]:
checkHeapConnection = function() {
    
    print(paste(length(dbListConnections(PostgreSQL())), " connections active."))
}

In [None]:
createHeapConnectionObject()

In [None]:
checkHeapConnection()

# Construct the bounds

### Inputs

In [None]:
##### Training Time frame.
##### Hour.
##### DOW.
##### Numerator/Denominator.

## Approach 1: Simple percentiles/Simple SD

In [None]:
## How to set the default value for a function?
## Default is to compare to today; Specify if want to compare to previous day. 
## This logic will have to feed into data query.
## Take the DOW of the specified/default date, and keep HOD that is <= Sys.time() if date = today.
## Take input numerator/denominator.

In [None]:
## 1. Input the date and numerator/denominator, and pull this test data.
## 2. Input the date (applicable DOW and hours), numerator/denominator, and training range, and pull this training data.
    ## <= 23 if not today; <= Sys.time() if today.
    ## Take the minimum(max(refresh time)) of the tables used in test data.
## 3. For each hour, calculate the a_th lower percentile of the training data. Alternatively, calculate SD.

In [None]:
## Create function to pull in string depending on the funnel step.

getQueryString = function(funnelStep) {
        
    queryString = ''

    if(funnelStep == 'qualified') {
        queryString = "count(distinct case when old_value = 'BUSINESS RULES PASSED' and new_value = 'BUREAU APPROVED' then c_ash.application end)"

    } else if(funnelStep == 'bankverified') {
        queryString = "count(distinct case when (c_ash.old_value = 'BANK VERIFICATION COMPLETED' and c_ash.new_value = 'NEW - SCORECARD GENERATED') or c_ash.new_value = 'BANK VERIFICATION COMPLETED' then c_ash.application end)"

    } else if(funnelStep == 'passscorecardratecard') {
        queryString = "count(distinct case when c_ash.old_value = 'NEW - PRICING GENERATED' and c_ash.new_value in ('CONTRACT SIGNED', 'WAITING ON STIPULATIONS') then c_ash.application end)"

    } else if(funnelStep == 'contractsigned') {
        queryString = "count(distinct case when c_ash.old_value in ('NEW - PRICING GENERATED', 'WAITING ON STIPULATIONS') and c_ash.new_value = 'CONTRACT SIGNED' then c_ash.application end)"

    } else if(funnelStep == 'cs_decisioned') {
        queryString = "count(distinct case when c_ash.old_value in ('NEW - PRICING GENERATED', 'WAITING ON STIPULATIONS') and c_ash.new_value = 'CONTRACT SIGNED' and (c_app.denialreason not in ('Withdraw','Time In Pending') or c_app.status = 'LOAN APPROVED') then c_ash.application end)"

    } else if(funnelStep == 'funded') {
        queryString = "count(distinct case when c_ash.new_value = 'LOAN APPROVED' then c_ash.application end)"

    }
        
    return(queryString)
}

### Step 1: Test Data

In [None]:
# ## Pull the test data, based on 
# ## DATE (string)
# ## NUMERATOR (string)
# ## DENOMINATOR (string)

# ## Makes sure that the refreshing follows that of the stalest table.
# ## Returns list of runtime, last refresh, and test data.

# ## SNAPSHOT LOGIC!

# getTestData = function(numerator, denominator, date_) {

#     runTime = 0
#     runTime_start = Sys.time()

#     checkTimezones = dbGetQuery(
#         createReportingConnectionObject(),
#         paste(
#             "
#             select
#                 max(createddate at time zone 'America/Chicago') as max_,
#                 min(createddate at time zone 'America/Chicago') as min_
#             from
#                 cloudlending.applications
#             where
#                 (createddate at time zone 'America/Chicago')::date = '", date_,"'::date
            
#             union all

            
#             select
#                 max(createddate at time zone 'America/Chicago') as max_,
#                 min(createddate at time zone 'America/Chicago') as min_
#             from
#                 cloudlending.application_status_history
#             where
#                 (createddate at time zone 'America/Chicago')::date = '", date_,"'::date
#             ",
#             sep = ''
#         )
#     )
    
    
#     dfTest = dbGetQuery(
#         createReportingConnectionObject(),
#         paste(
#             "
#             select
#                 c_app.name
#                 , c_app.createddate at time zone 'America/Chicago' as createddate_
#                 , (c_app.createddate at time zone 'America/Chicago')::date as date_
#                 , extract(hour from c_app.createddate at time zone 'America/Chicago') as hour_
#                 , ", getQueryString(numerator)," as numerator
#                 , ", getQueryString(denominator)," as denominator
#             from
#                 cloudlending.applications as c_app
#                 inner join
#                     cloudlending.application_status_history as c_ash
#                     on c_app.id = c_ash.application
#                 inner join
#                     (
#                         select
#                             max(max_) as c_max
#                         from
#                             (
#                                 select
#                                     max(createddate at time zone 'America/Chicago') as max_
#                                 from 
#                                     cloudlending.application_status_history

#                                 union all


#                                 select
#                                     max(createddate at time zone 'America/Chicago') as max_
#                                 from 
#                                     cloudlending.applications
#                             ) as c_maxes
#                     ) as c_maxes
#                     on TRUE
#             where
#                 c_app.source_type = 'Low'
#                 and (c_app.createddate at time zone 'America/Chicago')::date = '", date_, "'::date
#                 and c_app.createddate at time zone 'America/Chicago' <= c_maxes.c_max
#                 and c_ash.createddate at time zone 'America/Chicago' <= c_maxes.c_max
#                 -- SNAPSHOT LOGIC
#                 and (c_ash.createddate at time zone 'America/Chicago')::date = (c_app.createddate at time zone 'America/Chicago')::date
#             group by
#                 c_app.name
#                 , createddate_
#                 , date_
#                 , hour_
#             order by
#                 createddate_ desc
#             ",
#             sep = ''
#         )
#     )
    
#     runTime_end = Sys.time()
    
#     final = list(
#         runTime = runTime_end - runTime_start,
#         matchTimezones = checkTimezones,
#         df_test = dfTest
#     )   
    
#     return(final)
# }

In [None]:
## Pull the test data, based on 
## DATE (string)
## NUMERATOR (string)
## DENOMINATOR (string)

## Makes sure that the refreshing follows that of the stalest table.
## Returns list of runtime, last refresh, and test data.

## SNAPSHOT LOGIC!

getTestData = function(numerator, denominator, date_) {

    runTime = 0
    runTime_start = Sys.time()

    checkTimezones = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                max(createddate at time zone 'America/Chicago') as max_,
                min(createddate at time zone 'America/Chicago') as min_
            from
                cloudlending.applications
            where
                (createddate at time zone 'America/Chicago')::date = '", date_,"'::date
            
            union all

            
            select
                max(createddate at time zone 'America/Chicago') as max_,
                min(createddate at time zone 'America/Chicago') as min_
            from
                cloudlending.application_status_history
            where
                (createddate at time zone 'America/Chicago')::date = '", date_,"'::date
            ",
            sep = ''
        )
    )
    
    
    dfTest = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                (c_ash.createddate at time zone 'America/Chicago')::date as date_
                , extract(hour from c_ash.createddate at time zone 'America/Chicago') as hour_
                , ", getQueryString(numerator)," as numerator
                , ", getQueryString(denominator)," as denominator
            from
                cloudlending.applications as c_app
                inner join
                    cloudlending.application_status_history as c_ash
                    on c_app.id = c_ash.application
                inner join
                    (
                        select
                            max(max_) as c_max
                        from
                            (
                                select
                                    max(createddate at time zone 'America/Chicago') as max_
                                from 
                                    cloudlending.application_status_history

                                union all


                                select
                                    max(createddate at time zone 'America/Chicago') as max_
                                from 
                                    cloudlending.applications
                            ) as c_maxes
                    ) as c_maxes
                    on TRUE
            where
                c_app.source_type = 'Low'
                and (c_ash.createddate at time zone 'America/Chicago')::date = '", date_, "'::date
                and c_app.createddate at time zone 'America/Chicago' <= c_maxes.c_max
                and c_ash.createddate at time zone 'America/Chicago' <= c_maxes.c_max
            group by
                date_
                , hour_
            order by
                date_ desc
            ",
            sep = ''
        )
    )
    
    runTime_end = Sys.time()
    
    final = list(
        runTime = runTime_end - runTime_start,
        matchTimezones = checkTimezones,
        df_test = dfTest
    )   
    
    return(final)
}

In [None]:
dfTestPull = getTestData(
    numerator = 'bankverified',
    denominator = 'qualified',
#     date_ = as.Date(Sys.time())
#     date_ = as.Date('2019-01-01')
    date_ = as.Date('2018-12-12')
)
list(
    dfTestPull$runTime,
    dfTestPull$matchTimezones,
    rbind(head(dfTestPull$df_test), tail(dfTestPull$df_test))
)

### Step 2: Training data

In [None]:
# ## Pull the training data, based on 
# ## DATE (string)
# ## DAYS BACK (integer)
# ## NUMERATOR (string)
# ## DENOMINATOR (string)
# ## HOUR OF DATE (can just use where (24 if not today or paste max(hour_)) from test data) - accounts for refresh.

# ## Makes sure that the refreshing follows that of the stalest table.
# ## Returns list of runtime, last refresh, and test data.


# getTrainingData = function(numerator, denominator, date_, daysBack) {

#     runTime = 0
#     runTime_start = Sys.time()

#     checkTimezones = dbGetQuery(
#         createReportingConnectionObject(),
#         paste(
#             "
#             select
#                 max(createddate at time zone 'America/Chicago') as max_,
#                 min(createddate at time zone 'America/Chicago') as min_
#             from
#                 cloudlending.applications
#             where
#                 (createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
#                 and (createddate at time zone 'America/Chicago')::date < '", date_,"'::date
            
#             union all

            
#             select
#                 max(createddate at time zone 'America/Chicago') as max_,
#                 min(createddate at time zone 'America/Chicago') as min_
#             from
#                 cloudlending.application_status_history
#             where
#                 (createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
#                 and (createddate at time zone 'America/Chicago')::date < '", date_,"'::date
#             ",
#             sep = ''
#         )
#     )
    
    
#     dfTraining = dbGetQuery(
#         createReportingConnectionObject(),
#         paste(
#             "
#             select
#                 c_app.name
#                 , c_app.createddate at time zone 'America/Chicago' as createddate_
#                 , (c_app.createddate at time zone 'America/Chicago')::date as date_
#                 , extract(hour from c_app.createddate at time zone 'America/Chicago') as hour_
#                 , ", getQueryString(numerator)," as numerator
#                 , ", getQueryString(denominator)," as denominator
#             from
#                 cloudlending.applications as c_app
#                 inner join
#                     cloudlending.application_status_history as c_ash
#                     on c_app.id = c_ash.application
#                 inner join
#                     (
#                         select
#                             max(max_) as c_max
#                         from
#                             (
#                                 select
#                                     max(createddate at time zone 'America/Chicago') as max_
#                                 from 
#                                     cloudlending.application_status_history

#                                 union all


#                                 select
#                                     max(createddate at time zone 'America/Chicago') as max_
#                                 from 
#                                     cloudlending.applications
#                             ) as c_maxes
#                     ) as c_maxes
#                     on TRUE
#             where
#                 c_app.source_type = 'Low'
#                 and (c_app.createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
#                 and (c_app.createddate at time zone 'America/Chicago')::date < '", date_,"'::date
#                 -- SNAPSHOT LOGIC
#                 and (c_ash.createddate at time zone 'America/Chicago')::date = (c_app.createddate at time zone 'America/Chicago')::date
#                 and 
#                 (
#                     (
#                         '", as.Date(Sys.time()),"'::date = '", date_, "'::date
#                         and extract(hour from c_ash.createddate at time zone 'America/Chicago') <= extract(hour from c_maxes.c_max)
#                         and extract(minute from c_ash.createddate at time zone 'America/Chicago') <= extract(minute from c_maxes.c_max)
#                     )
#                     or '", as.Date(Sys.time()),"'::date != '", date_, "'::date
#                 )
#             group by
#                 c_app.name
#                 , createddate_
#                 , date_
#                 , hour_
#             order by
#                 createddate_ desc
#             ",
#             sep = ''
#         )
#     )
    
#     runTime_end = Sys.time()
    
#     final = list(
#         runTime = runTime_end - runTime_start,
#         matchTimezones = checkTimezones,
#         df_training = dfTraining
#     )   
    
#     return(final)
# }


In [None]:
## Pull the training data, based on 
## DATE (string)
## DAYS BACK (integer)
## NUMERATOR (string)
## DENOMINATOR (string)
## HOUR OF DATE (can just use where (24 if not today or paste max(hour_)) from test data) - accounts for refresh.

## Makes sure that the refreshing follows that of the stalest table.
## Returns list of runtime, last refresh, and test data.


getTrainingData = function(numerator, denominator, date_, daysBack) {

    runTime = 0
    runTime_start = Sys.time()

    checkTimezones = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                max(createddate at time zone 'America/Chicago') as max_,
                min(createddate at time zone 'America/Chicago') as min_
            from
                cloudlending.applications
            where
                (createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
                and (createddate at time zone 'America/Chicago')::date < '", date_,"'::date
            
            union all

            
            select
                max(createddate at time zone 'America/Chicago') as max_,
                min(createddate at time zone 'America/Chicago') as min_
            from
                cloudlending.application_status_history
            where
                (createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
                and (createddate at time zone 'America/Chicago')::date < '", date_,"'::date
            ",
            sep = ''
        )
    )
    
    
    dfTraining = dbGetQuery(
        createReportingConnectionObject(),
        paste(
            "
            select
                (c_ash.createddate at time zone 'America/Chicago')::date as date_
                , extract(hour from c_ash.createddate at time zone 'America/Chicago') as hour_
                , ", getQueryString(numerator)," as numerator
                , ", getQueryString(denominator)," as denominator
            from
                cloudlending.applications as c_app
                inner join
                    cloudlending.application_status_history as c_ash
                    on c_app.id = c_ash.application
                inner join
                    (
                        select
                            max(max_) as c_max
                        from
                            (
                                select
                                    max(createddate at time zone 'America/Chicago') as max_
                                from 
                                    cloudlending.application_status_history

                                union all


                                select
                                    max(createddate at time zone 'America/Chicago') as max_
                                from 
                                    cloudlending.applications
                            ) as c_maxes
                    ) as c_maxes
                    on TRUE
            where
                c_app.source_type = 'Low'
                and (c_ash.createddate at time zone 'America/Chicago')::date >= '", as.Date(date_) - daysBack,"'::date
                and (c_ash.createddate at time zone 'America/Chicago')::date < '", date_,"'::date
                and c_app.createddate at time zone 'America/Chicago' <= c_maxes.c_max
                and c_ash.createddate at time zone 'America/Chicago' <= c_maxes.c_max
            group by
                date_
                , hour_
            order by
                date_ desc
            ",
            sep = ''
        )
    )
    
    runTime_end = Sys.time()
    
    final = list(
        runTime = runTime_end - runTime_start,
        matchTimezones = checkTimezones,
        df_training = dfTraining
    )   
    
    return(final)
}


In [None]:
dfTrainingPull = getTrainingData(
    numerator = 'bankverified',
    denominator = 'qualified',
#     date_ = as.Date(Sys.time()),
#     date_ = as.Date('2019-01-01'),
    date_ = as.Date('2018-12-12'),
    daysBack = 100
)
list(
    dfTrainingPull$runTime,
    dfTrainingPull$matchTimezones,
    rbind(head(dfTrainingPull$df_training), tail(dfTrainingPull$df_training))
)

### Step 3: Training Statistics

In [None]:
dfTrainingAggregated = 
    dfTrainingPull$df_training %>%
    group_by(
        date_,
        hour_
    ) %>%
    summarize(
        numerator = sum(numerator),
        denominator = sum(denominator),
        conversion = sum(numerator)/sum(denominator)
    )
rbind(
    head(dfTrainingAggregated),
    tail(dfTrainingAggregated)
)

In [None]:
dfTrainingStats = 
    dfTrainingAggregated %>%
    group_by(
        hour_
    ) %>%
    summarize(
        mean = sum(numerator)/sum(denominator),
#         se = sd(conversion)
        se = sqrt((var(numerator) + (sum(numerator)/sum(denominator))^2 * var(denominator) - 2*sum(numerator)/sum(denominator)*cor(numerator, denominator)*sd(numerator)*sd(denominator))/n())
#         se = sqrt(var(sum(numerator)/sum(denominator))/n())
    )
dfTrainingStats

In [None]:
dfTestMeans = 
    dfTestPull$df_test %>%
    group_by(
        hour_
    ) %>%
    summarize(
        conversion = sum(numerator)/sum(denominator),
        numerator = sum(numerator),
        denominator = sum(denominator)        
    )
dfTestMeans

### Step 4: Create ggplot df

In [None]:
dfBounds = data.frame(
    hour_ = dfTestMeans$hour_,
    y_ = dfTestMeans$conversion,
    numerator = dfTestMeans$numerator,
    denominator = dfTestMeans$denominator,
    mean = dfTrainingStats$mean,
    lower = dfTrainingStats$mean - 1.64*dfTrainingStats$se,
    upper = dfTrainingStats$mean + 1.64*dfTrainingStats$se
)
dfBounds

In [None]:
ggplot(
    data = dfBounds,
    aes(
        x = hour_
    )
) +
geom_line(
    aes(y = lower)
) +
geom_bar(
    aes(y = y_),
    stat = 'identity'
)

#### Checking

In [None]:
## Will adjust the training data to examine how far back to start the training data.

In [None]:
## First, create the data frame that aggregates over DOW and HOD.

# df1_training_means = 
#     df %>%
#     group_by(
#         dow_,
#         hour_
#     ) %>%
#     summarize(
#         bv_q = sum(bankverified)/sum(qualified),
#         sc_bv = sum(passscorecardratecard)/sum(bankverified),
#         cs_sc = sum(contractsigned)/sum(passscorecardratecard),
#         decisioned_cs = sum(cs_decisioned)/sum(contractsigned),
#         f_decisioned = sum(funded)/sum(cs_decisioned)
#     )
# df1_training_means

In [None]:
## How to set the default value for a function?
## Default is to compare to today; Specify if want to compare to previous day. 
## This logic will have to feed into data query.
## Take the DOW of the specified/default date, and keep HOD that is <= Sys.time() if date = today.
## Take input numerator/denominator.




## start over. below shouldn't aggregate on hour_ so early.




getSpecifiedTraining = function(numerator, denominator, dateCompare, daysBefore, alpha) {
    
    ## pull this out to be own sub-function
    timestart = dateCompare - daysBefore
    timeend = dateCompare - 1
    df_training = getQuery() ## be weary of data refresh - take the stalest refresh time of all tables used
    
    
    ## pull this out to be own sub-function
    dowCurrent = getDOW(date_)
    hourCurrent = ifelse(date == Sys.time(), getHour(Sys.time), 23)
    
    df_training_agg = 
        df_training[which(df_training$dow_ == dowCurrent & 
                          df_training$hour_ < hourCurrent)] %>%
        group_by(
            hour_
        )
        summarize(
            conversion = sum(!!numerator)/sum(!!denominator)
        )
    
    ## pull this out to be own sub-function
    getLowerBound = alphaPercentile(df_training_agg$conversion)
    getUpperBound = alphaPercentileComp(df_training_agg$conversion)
    
    df_training_bounds = data.frame(
        hour_ = df_training_agg
    )
    
    
}


### Calculate the mean.

In [None]:
df_training_means = 
    df %>%
    group_by(
        hour_
    ) %>%
    summarize(
        bv_q = sum(bankverified)/sum(qualified),
        sc_bv = sum(passscorecardratecard)/sum(bankverified),
        cs_sc = sum(contractsigned)/sum(passscorecardratecard),
        decisioned_cs = sum(cs_decisioned)/sum(contractsigned),
        f_decisioned = sum(funded)/sum(cs_decisioned)
    )
head(df_training_means)

### Calculate the spread.

In [None]:
df_training_aggregate = 
    df %>%
    group_by(
        date_,
        hour_
    ) %>%
    summarize(
        qualified = sum(qualified),
        bankverified = sum(bankverified),
        passscorecardratecard = sum(passscorecardratecard),
        contractsigned = sum(contractsigned),
        cs_decisioned = sum(cs_decisioned),
        funded = sum(funded)
    )
head(df_training_aggregate)

In [None]:
df_training_parameters = 
    df_training_aggregate %>%
    group_by(
        hour_
    ) %>%
    summarize(
        r = sum(bankverified)/sum(qualified),
        sy2 = var(bankverified),
        sx2 = var(qualified),
        cor = cor(bankverified, qualified),
        n = n()
    )
df_training_parameters

df_training_se = 
    df_training_parameters %>%
    group_by(
        hour_
    ) %>% 
    summarize(
        sqrt((sy2 + r^2*sx2 - 2*r*cor*sqrt(sx2)*sqrt(sy2))/n)
    )
df_training_se


# df_training_se = data.frame(
#     df_training_parameters$hour_,
#     df_training_parameters
# )

# r = sum(df_training_aggregate$bankverified)/sum(df_training_aggregate$qualified)
# sy2 = var(df_training_aggregate$bankverified)
# sx2 = var(df_training_aggregate$qualified)
# cor = cor(df_training_aggregate$bankverified, df_training_aggregate$qualified)
# n = length(unique(df_training_aggregate$date_))

# df_training_se = sqrt((sy2 + r^2*sx2 - 2*r*cor*sqrt(sx2)*sqrt(sy2))/n)
# df_training_se

# df_training_r
# df_training_sy2
# df_training_sx2
# df_training_cor
# df_training_n

In [None]:
df_training_ratio = data.frame(
    date_ = df_aggregate$date_,
    hour_ = df_aggregate$hour_,
    bv_q = df_aggregate$bankverified/df_aggregate$qualified,
    sc_bv = df_aggregate$passscorecardratecard/df_aggregate$bankverified,
    cs_sc = df_aggregate$contractsigned/df_aggregate$passscorecardratecard,
    decisioned_cs = df_aggregate$cs_decisioned/df_aggregate$contractsigned,
    f_decisioned = df_aggregate$funded/df_aggregate$cs_decisioned
)
head(df_training_ratio,5)

In [None]:
df_training_sd = 
    df_ratio %>%
    group_by(
        hour_
    ) %>%
    summarize(
        bv_q = sd(bv_q),
        sc_bv = sd(sc_bv),
        cs_sc = sd(cs_sc),
        decisioned_cs = sd(decisioned_cs),
        f_decisioned = sd(f_decisioned)
    )
head(df_training_sd)

# Create the plot.

### Format the test data.

In [None]:
head(df_test)

In [None]:
df_test_means = 
    df_test %>%
    group_by(
        hour_
    ) %>%
    summarize(
        bv_q = sum(bankverified)/sum(qualified),
        sc_bv = sum(passscorecardratecard)/sum(bankverified),
        cs_sc = sum(contractsigned)/sum(passscorecardratecard),
        decisioned_cs = sum(cs_decisioned)/sum(contractsigned),
        f_decisioned = sum(funded)/sum(cs_decisioned)
    )
df_test_means

## Next steps

In [None]:
## Add logic to make the query account for actions only on day 1 of each daily cohort (snapshot logic).

####### Add DOW to data query. #######

## Modular: Make functions with inputs for DOW, training time range, hour, numerator/denominator, and confidence.
# For interval calculation.
# For data query (use paste())

## Approach 1: simply take confidence-th percentile on the data. (very easy)

## Approach 2: take the simple SD on the percents. 
