In [None]:
import os
from simple_cysh import * # includes pandas as pd
os.chdir(r'Z:\ChiPrivate\Chicago Reports and Evaluation\SY18\Eval Management\ACM_analysis')

In [None]:
# Fill in details on each section, ensure no duplicates (one staff per section)
section_df = get_cysh_df('Section__c', ['Id', 'Name', 'School__c', 'Program__c', 'In_After_School__c', 'Intervention_Primary_Staff__c'], rename_id=True)
section_df.rename(columns={'Intervention_Primary_Staff__c':'Staff__c'}, inplace=True)
program_df = get_cysh_df('Program__c', ['Id', 'Name'], rename_id=True, rename_name=True)
staff_df = get_cysh_df('Staff__c', ['Id', 'Name', 'Role__c'], rename_id=True, rename_name=True)

In [None]:
# add program type to section__c
df = section_df.merge(program_df, how='left', on='Program__c')
# add staff 'Name' and 'Role__c' to section__c
df = df.merge(staff_df, how='left', on='Staff__c')
# filter
df = df.loc[~df['Program__c_Name'].isnull()]
df = df.loc[df['Program__c_Name'].str.contains('Tutoring')
            & (df['In_After_School__c']=='In School')]

In [None]:
# aggregate student ToT by program
student_section_df = get_cysh_df('Student_Section__c', ['Id', 'Section__c', 'Student__c', 'Student_Id__c'], rename_id=True)
ISR_df = get_cysh_df('Intervention_Session_Result__c', ['CreatedDate', 'Intervention_Session_Date__c',
                                                        'Amount_of_Time__c', 'Primary_Skill__c',
                                                        'Student_Section__c', 'Intervention_Session__c',
                                                        'Section_Name__c', 'Program__c', 'Program_Name__c'])

ISR_df = ISR_df.merge(student_section_df, how='left', on='Student_Section__c')
ISR_df['student_program'] = ISR_df['Student__c'] + '_' + ISR_df['Program__c']
ISR_df['Intervention_Session_Date__c'] = pd.to_datetime(ISR_df['Intervention_Session_Date__c'])
ISR_df['CreatedDate'] = pd.to_datetime(ISR_df['CreatedDate']).dt.normalize()
# ISR_df = ISR_df.loc[ISR_df['Intervention_Session_Date__c'] < '2017-12-11']

aggs = ['sum', 'mean', 'count']
ISR_df_agg = ISR_df.groupby('student_program')['Amount_of_Time__c'].agg(aggs).reset_index()
ISR_df_agg.rename(columns=dict(zip(aggs, ['ToT_' + col for col in aggs])), inplace=True)

In [None]:
ISR_df.groupby(['Program_Name__c', 'Primary_Skill__c'])['Amount_of_Time__c'].count()

In [None]:
ISR_df['LogDaysDiff'] = (ISR_df['CreatedDate'] - ISR_df['Intervention_Session_Date__c']).dt.days
student_df = get_cysh_df('Student__c', ['Id', 'School_Name__c'], rename_id=True)
ISR_df = ISR_df.merge(student_df, on='Student__c')
ISR_df.loc[ISR_df['Intervention_Session_Date__c']>'2018-02-05'].groupby('School_Name__c')['LogDaysDiff'].mean().sort_values()

In [None]:
# Q3_ToT = ISR_df.loc[ISR_df['Intervention_Session_Date__c']<='2018-04-12']
# Q3_ToT = ISR_df.groupby(['Student_Id__c', 'Program_Name__c'])['Amount_of_Time__c'].sum().reset_index()
# Q3_ToT.to_csv(r'Z:\ChiPrivate\Chicago Reports and Evaluation\SY18\SY18 Quarterly Reports\Quarter 3\Q3 ToT Freeze.csv', index=False)

In [None]:
# add ToT sum by student_program
ISR_df = ISR_df.merge(ISR_df_agg, how='left', on='student_program')

# drop duplicates on student_section, reduce columns
ISR_df.drop_duplicates('Student_Section__c', inplace=True)

df = df.merge(ISR_df[['Section__c', 'Student__c'] + list(ISR_df_agg.columns)], on='Section__c')

In [None]:
df.to_csv('time_on_task_2017-12-11.csv', index=False)

In [None]:
# define active as students who received more than 200 mins tutoring?
# other factors to add at some point: Number of ACMs who contributed a meaningful amount of time