In [None]:
# default_exp core.caseload

## This code generates data for caseload analyis

> When a case spans many months, data is created for each month of it's lifespan in order to perform a variety of analysis.  In this particular scenario, we want assess the impact by month and calulate the effort required.


In [None]:
#hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#export
import pandas as pd
import numpy  as np
from datetime import datetime
import altair as alt

#from cheeky_monkey import *

pd.set_option('display.max_columns', 30) # set so can see all columns of the DataFrame

from vega_datasets import data
_=alt.data_transformers.disable_max_rows()


CESSATION_CONTINUATION = {
    'Driver Deceased': 'Deceased', 
    'Drivers found medically unfit to drive':'Cessation',
    'Drivers that did not respond; cancelled license':'Cessation',
    'Drivers that voluntarily surrendered their license':'Cessation',
    'Drivers ultimately found fit to drive':'Continuation',
    'Cases remaining open at time of reporting':'Continuation'
}



In [None]:
#export
def get_generated_caseload_data_bymonth(f_path):
    filepath = f_path + 'caseloaddata_by_month.csv'
    caseload_data_by_month = pd.read_csv(filepath,parse_dates=['CASE_OPENED_DT'])
    caseload_data_by_month['Opened Month'] = caseload_data_by_month.apply(lambda x: x['CASE_OPENED_DT'].strftime('%b') + '-' + x['CASE_OPENED_DT'].strftime('%Y'), axis=1)
    return caseload_data_by_month

In [None]:
def get_processed_case_data(f_path):
    file_path = f_path + 'cases_processed.csv'
    cases_df = pd.read_csv(file_path,parse_dates=['BIRTHDATE','CASE_OPENED_DT','PREV_CASE_END_DT','LAST_STATUS_DATE'], dtype={'DRIVERS_LICENSE_NO': str})
    cases_df = cases_df[(cases_df['Ignore Case'] == 0) ]
    cases_df['Age Category'] = cases_df.apply( lambda x: 'Over 80 ' if x.age_bucket >= 80 else 'Under 80', axis=1)
    cases_df['Type Origin'] = cases_df.apply( lambda x: str(x['CASE_CD']) + '_' + str(x['ORIGIN_CD']), axis=1)
    cases_df['Type & Origin Desc'] = cases_df.apply( lambda x: str(x['CASE_DSC']) + ' & ' + str(x['ORIGIN_DSC']), axis=1)
    cases_df['Case Length Over 30 Days'] = cases_df.apply( lambda x: True if x['case_length_days'] >= 30 else False, axis=1)
    cases_df['Case Length Over 60 Days'] = cases_df.apply( lambda x: True if x['case_length_days'] >= 60 else False, axis=1)


In [None]:
data_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\\'
generated_data_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\\'
cases_df = get_processed_case_data(data_file_path)

# cases_df = pd.read_csv(r'..\cases_processed.csv',parse_dates=['BIRTHDATE','CASE_OPENED_DT','PREV_CASE_END_DT','LAST_STATUS_DATE'], dtype={'DRIVERS_LICENSE_NO': str})
# cases_df = cases_df[cases_df['Ignore Case'] == 0]
# cases_df = cases_df[cases_df['Is Adjudicated'] == 'Adjudicated']
# cases_df['GENERAL_STATUS'].value_counts()

In [None]:
#hideinput

# def f(r):
#     backlog_diff = r['Backlog Date'] - r['CASE_OPENED_DT']
#     backlog_diff = backlog_diff/np.timedelta64(1,'M')
#     last_update_diff = r['LAST_STATUS_DATE'] - r['Backlog Date'] # if same month then
#     last_update_diff = last_update_diff/np.timedelta64(1,'M')
#     if backlog_diff < 2:
#         return 0
#     elif last_update_diff <= 0 and backlog_diff < 3 :
#         return 0
#     elif ( (r['Backlog Date'].year == r['LAST_STATUS_DATE'].year ) 
#            and (r['Backlog Date'].month - r['LAST_STATUS_DATE'].month == 0 ) 
#            and ( r['GENERAL_STATUS'] == 'Closed')   
#            and backlog_diff >= 2        
#          ):
#         return -1
#     elif backlog_diff >= 2:
#         return 1
#     else:
#         return 100

# def f_isopen(r):
#     status_diff = r['LAST_STATUS_DATE'] - r['CASE_OPENED_DT']
#     status_diff = status_diff/np.timedelta64(1,'M')
#     year_span_diff = r['Year Span'] - r['LAST_STATUS_DATE'] # if same month then
#     year_span_diff = year_span_diff/np.timedelta64(1,'M')    
# #     print(diff)
# #     print(year_span_diff)
#     if year_span_diff < 1 :
#         return 'Open'
#     else:
#         return 'Closed'

    
def f_caseload_isopen(r):
    
    year_span_diff = r['Year Span'] - r['LAST_STATUS_DATE'] # if same month then
    year_span_diff = year_span_diff/np.timedelta64(1,'M')    

    status_diff = r['LAST_STATUS_DATE'] - r['CASE_OPENED_DT']
    status_diff = status_diff/np.timedelta64(1,'M')

    if r['GENERAL_STATUS'] == 'Open':
        status = 'Open'
        open_count = 1
        closed_count = 0
    else:
        if year_span_diff <= 0:
            status = 'Open'
            open_count = 1
            closed_count = 0
        else:
            status = 'Closed'
            open_count = 0
            closed_count = 1
        
    return (status, open_count, closed_count)


In [None]:
#hideinput
def build_range():
    #r = pd.date_range(*(pd.to_datetime([datetime.strptime('1-1-2018', '%d-%m-%Y'), datetime.strptime('1-9-2021', '%d-%m-%Y')]) + pd.offsets.MonthEnd()), freq='M') 
    r = pd.date_range(*(pd.to_datetime([datetime.strptime('1-1-2018', '%d-%m-%Y'), datetime.strptime('1-11-2021', '%d-%m-%Y')]) + pd.offsets.MonthEnd()), freq='M') 
    return r

def filter_data(d, year, month):
    return d[(d.opened_year == year) & ( d.opened_month == month )].reset_index()

def build_monthly_caseload(d, end_date, filename):
    summaryfilename = filename.replace('.csv', '_Adjudicated_Summary.csv' ) #Jan_2018_toSept2021.csv
    data = d[d['LAST_STATUS_DATE'].notna()].reset_index()
    data['Year Date'] = end_date

    data['Year Span'] = [pd.date_range(*(pd.to_datetime([s, e]) + pd.offsets.MonthEnd()), freq='M') for s, e in
                  zip(pd.to_datetime(data['CASE_OPENED_DT']),
                       pd.to_datetime(data['Year Date']))]

    data = data.explode('Year Span')

    if data.empty:
        return None
    
    data[['Status', 'Open Count', 'Closed Count']] = data.apply( f_caseload_isopen   , axis=1).to_list()
    data.to_csv(filename, index = False)
    
    # we have determined impact for each case, now aggregate numbers so we have the impact for the month of data


    aggregation = {
    'Open Count': ('Open Count','sum'),
    'Closed Count': ('Closed Count','sum'),
    'Case Count': ('DRIVERS_LICENSE_NO','count'),
        'Status Count': ('STATUS_COUNT', 'sum')
    }

    monthly_counts = data.groupby([pd.Grouper(freq='M', key='CASE_OPENED_DT') , 
                                  pd.Grouper(key='Year Span'),
                                  pd.Grouper(key='Status'),
                                  pd.Grouper(key='Age Category'),                                   
                                 ]).agg(** aggregation)
    
    monthly_counts = pd.DataFrame(monthly_counts).reset_index()
    monthly_counts.to_csv(summaryfilename, index = False)
    
    return monthly_counts

In [None]:
def generate_caseload_data(f_path):
    caseload_data = pd.DataFrame()

    r = build_range()
    #for date in r[:2]:
    for date in r:
        if (date.year == r[-1].year) & (date.month == r[-1].month) : break

        filename = generated_data_file_path + date.strftime("%b") + '_' + date.strftime("%Y") + '_' + 'toNov2021_ADJUDICATEDCASELOAD' + '.csv' 

        monthly_data = filter_data(cases_df, date.year, date.month)    

        caseload_month = build_monthly_caseload(monthly_data,r[-1], filename)
        
        if caseload_month is None: 
            break
        if caseload_data.empty:
            caseload_data = caseload_month
        else:
            caseload_data = caseload_data.append(caseload_month)
    
    return caseload_data

    

In [None]:
if 0:
    caseload_data = generate_caseload_data()
    caseload_data['Cases_cumsum'] = caseload_data.groupby(['CASE_OPENED_DT','Year Span'])['Case Count'].cumsum()
else:
    caseload_data = False

In [None]:
if caseload_data:

    aggregation = {
         'Open Count': ('Open Count','sum')
    }
    jan_data = caseload_data[caseload_data['CASE_OPENED_DT'] == '2018-01-31']

    backlog_data_byday = jan_data.groupby(['Year Span']) \
    .agg(** aggregation).reset_index()

else:
    backlog_data_byday = 'Nothing to be done'
backlog_data_byday

'Nothing to be done'

In [None]:
if  backlog_data_byday != 'Nothing to be done':
    montly_backlog_all = alt.Chart(backlog_data_byday).mark_area(point=True).encode(
        y = alt.Y("Open Count:Q" ),
        x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
    #   color = "Year Span:N",
    #    tooltip=['Backlog Total']
    ).properties(
        width=700,
        height=400
    )
else:
    montly_backlog_all = backlog_data_byday

montly_backlog_all

'Nothing to be done'

In [None]:
backlog_data_byday

'Nothing to be done'

In [None]:
if  backlog_data_byday != 'Nothing to be done':
    aggregation = {
         'Open Count': ('Open Count','sum')
    #     'Cases': ('Cases','sum'),
    }

    backlog_data_byday = jan_data.groupby(['Year Span', 'Age Category']) \
    .agg(** aggregation).reset_index()

    montly_backlog_all = alt.Chart(backlog_data_byday).mark_area(point=True).encode(
        y = alt.Y("sum(Open Count):Q" ),
        x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
       color = "Age Category:N",
       tooltip=['sum(Open Count)']
    ).properties(
        width=700,
        height=400
    )
else:
    montly_backlog_all = backlog_data_byday

montly_backlog_all


'Nothing to be done'

In [None]:
if  backlog_data_byday != 'Nothing to be done':
    aggregation = {
         'Open Count': ('Open Count','sum'),
       'Total Cases': ('Case Count','sum'),
    }

    backlog_data_byday = jan_data.groupby(['Year Span', 'Age Category','CASE_OPENED_DT']) \
    .agg(** aggregation).reset_index()

    montly_backlog_all = alt.Chart(backlog_data_byday).mark_area(point=True).encode(
        y = alt.Y("sum(Open Count):Q" ),
        x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
       color = "CASE_OPENED_DT:N",
        tooltip=['CASE_OPENED_DT:T']
    ).properties(
        width=700,
        height=400
    )
else:
    montly_backlog_all = backlog_data_byday

montly_backlog_all



'Nothing to be done'

In [None]:
if caseload_data:
    filepath = generated_data_file_path + 'caseloaddata_by_month.csv' 
    caseload_data.to_csv(filepath, index = False)
else:
    print(montly_backlog_all)

Nothing to be done


In [None]:
#hide

from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_core.baseline.ipynb.
Converted 02_core.caseload.ipynb.
No export destination, ignored:
#exporti
data_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\\'
cases2018_df = get_2018processed_case_data(data_file_path)

Converted 10. Cost Analysis Baseline - 2018 Over 80 Cases by Case Origin.ipynb.
Converted 20. Caseload Modelling Introduction.ipynb.
Converted 21. Caseload Modelling - Monthly Totals Cases Opened.ipynb.
Converted 22. Caseload Modelling - Monthly Totals Cases Closed.ipynb.
Converted index.ipynb.
