# CMF Computation Tool

In [1]:
import pandas as pd

In [18]:
D1_data = pd.read_excel('data/D1_Phonolite_Input.xlsx', sheet_name = "Formatting Data")
D2_data = pd.read_excel('data/D2_LWA_Input.xlsx', sheet_name = "Formatting Data")
D6_data = pd.read_excel('data/D6_HFST_Input.xlsx', sheet_name = "Formatting Data")

# Show column names and data types
# for idx in range(len(crash_data.columns)):
#     print(crash_data.columns[idx], '\t' ,crash_data.dtypes[idx])


## Compute Naive CMFs

In [25]:
def get_pivot_counts(data: pd.DataFrame, index='curve_id', field = 'Relation_To_HFST_Treatment_No_Covid', unique_values = ['before treatment', 'after treatment', 'unknown']):
    pivot_count = pd.DataFrame()
    for value in unique_values:
        pivot_count[value] = pd.pivot_table(
            data=data,
            index=index,
            values=[field],
            aggfunc={
                field: lambda column: column[column == value].count(),
            }
        )
    pivot_count['Grand Total'] = pivot_count.sum(axis=1)
    return pivot_count


def compute_cmf(pivot_table: pd.DataFrame, years_before_treatment=3, years_after_treatment=3, before_columne_name='before treatment', after_column_name='after treatment'):
    freq_before = pivot_table[before_columne_name].sum(axis=0)/years_before_treatment
    freq_after = pivot_table[after_column_name].sum(axis=0)/years_after_treatment
    if freq_before == 0:
        return 1
    return freq_after/freq_before

def naive_CMF(data: pd.DataFrame, years_before_treatment=3, years_after_treatment=3):
    freq_before = len(data[data["Relation To Treatment"] == "before treatment"].index)
    freq_after = len(data[data["Relation To Treatment"] == "after treatment"].index)
    if freq_before == 0:
        return 1
    return freq_after/freq_before

In [26]:
naive_CMF(D6_data)

0.5104790419161677

### Total CMF

In [6]:
# pivot table
total_cmf_table = get_pivot_counts(data=crash_data, field='Relation_To_HFST_Treatment_No_Covid')
total_cmf_table


Unnamed: 0_level_0,before treatment,after treatment,unknown,Grand Total
curve_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2,1,1,4
2,3,1,0,4
3,1,0,0,1
4,0,0,1,1
6,2,1,2,5
...,...,...,...,...
269,1,0,1,2
270,2,0,0,2
271,0,3,0,3
272,2,1,0,3


In [7]:
# cmf results
total_cmf_table = get_pivot_counts(data=crash_data, field='Relation_To_HFST_Treatment_No_Covid')
total_cmf = compute_cmf(total_cmf_table, years_after_treatment=3, years_before_treatment=3)
print("total cmf without covid data: ", total_cmf.round(2))

total_cmf_table = get_pivot_counts(data=crash_data, field='Relation_To_HFST_Treatment')
total_cmf = compute_cmf(total_cmf_table, years_after_treatment=4, years_before_treatment=3)
print("total cmf with covid data: ", total_cmf.round(2))


total cmf without covid data:  0.7
total cmf with covid data:  0.69


### Single Vehicle CMF


In [8]:
# cmf results
filtered_data = crash_data[crash_data['Single_Vehicle']]
pivot_table = get_pivot_counts(data=filtered_data, field='Relation_To_HFST_Treatment_No_Covid')
cmf = compute_cmf(pivot_table, years_after_treatment=3, years_before_treatment=3)
print("total cmf without covid data: ", cmf.round(2))

filtered_data = crash_data[crash_data['Single_Vehicle']]
pivot_table = get_pivot_counts(data=filtered_data, field='Relation_To_HFST_Treatment')
cmf = compute_cmf(pivot_table, years_after_treatment=4, years_before_treatment=3)
print("total cmf with covid data: ", cmf.round(2))


total cmf without covid data:  0.62
total cmf with covid data:  0.6


### Surface Condition

In [9]:
# cmf results
filtered_data = crash_data[crash_data['Surface_Co']=='Wet']
pivot_table = get_pivot_counts(data=filtered_data, field='Relation_To_HFST_Treatment_No_Covid')
cmf = compute_cmf(pivot_table, years_after_treatment=3, years_before_treatment=3)
print("total cmf without covid data: ", cmf.round(2))

filtered_data = crash_data[crash_data['Surface_Co']=='Wet']
pivot_table = get_pivot_counts(data=filtered_data, field='Relation_To_HFST_Treatment')
cmf = compute_cmf(pivot_table, years_after_treatment=4, years_before_treatment=3)
print("total cmf with covid data: ", cmf.round(2))


total cmf without covid data:  0.47
total cmf with covid data:  0.44
