In [None]:
from simple_cysh import * # includes pandas as pd

In [None]:
# determine which fields are actively used in our system within the Assesment__c object
single_assmt = cysh.Assesment__c.get('a041a00000EmURoAAN')
single_assmt = pd.DataFrame(list(single_assmt.items()))
list(single_assmt.loc[~single_assmt[1].isnull()][0])

In [None]:
# get all assessments with desired fields
assmt_df = get_cysh_df('Assesment__c', ['Id', 'Type__c', 'Date_Administered__c',
                                        'X0_to_300_Scaled_Score__c', 'Student__c'], rename_id=True)

# get assessment type
assmt_types = get_cysh_df('Picklist_Value__c', ['Id', 'Name'])
assmt_types = assmt_types.rename(columns={'Id': 'Type__c', 'Name':'Assessment Type'})
assmt_df = assmt_df.merge(assmt_types, how='left', on='Type__c')
del assmt_df['Type__c']

# filter for valid assessments
assmt_df = assmt_df.loc[assmt_df['Assessment Type'].str.contains('NWEA')
                        & ~assmt_df['X0_to_300_Scaled_Score__c'].isnull()
                        & (assmt_df['X0_to_300_Scaled_Score__c']>0)]

In [None]:
# pivot: one student per row, assessments as columns
assmt_df['Key'] = assmt_df['Assessment Type'] + '_' + assmt_df['Date_Administered__c']

assmt_df = assmt_df.pivot_table(index=['Student__c'], columns='Key', values='X0_to_300_Scaled_Score__c')
assmt_df.head(0)

In [None]:
# get current year grade level for each student
stu_sec_df = get_cysh_df('Student_Section__c', ['Student__c', 'Student_Grade__c'])
stu_sec_df.drop_duplicates('Student__c', inplace=True)
stu_sec_df.set_index('Student__c', inplace=True)
assmt_df = assmt_df.merge(stu_sec_df, how='left', left_index=True, right_index=True)

In [None]:
# set baseline and calculate growth
assmt_df['NWEA - ELA_Baseline'] = assmt_df[['NWEA - ELA_2017-07-01', 'NWEA - ELA_2017-09-20']].bfill(axis=1).iloc[:, 0]
assmt_df['NWEA - MATH_Baseline'] = assmt_df[['NWEA - MATH_2017-07-01', 'NWEA - MATH_2017-09-20']].bfill(axis=1).iloc[:, 0]
assmt_df['NWEA - ELA_Growth'] = assmt_df['NWEA - ELA_2018-01-09'] - assmt_df['NWEA - ELA_Baseline']
assmt_df['NWEA - MATH_Growth'] = assmt_df['NWEA - MATH_2018-01-09'] - assmt_df['NWEA - MATH_Baseline']

In [None]:
# load in targets reference (produced in 'Generate Student Targets.ipynb')
targets_df = pd.read_csv('privatedata/assessment_targets_fall_to_winter.csv')
targets_df['Key'] = targets_df['Grade'].astype(str) + '_' + targets_df['Score'].astype(str) + '.0'
ELA_targets_df = targets_df.loc[targets_df['Subject']=='Reading']
ELA_targets_df.columns = ['ELA_'+ col for col in ELA_targets_df.columns]
MATH_targets_df = targets_df.loc[targets_df['Subject']=='Mathematics']
MATH_targets_df.columns = ['MATH_'+ col for col in MATH_targets_df.columns]

In [None]:
# merge in growth targets and find difference with actual growth
assmt_df['ELA_Key'] = assmt_df['Student_Grade__c'].astype(str) + '_' + assmt_df['NWEA - ELA_Baseline'].astype(str)
assmt_df['MATH_Key'] = assmt_df['Student_Grade__c'].astype(str) + '_' + assmt_df['NWEA - MATH_Baseline'].astype(str)
assmt_df = assmt_df.merge(ELA_targets_df[['ELA_Target_Growth', 'ELA_Target_Growth_SD', 'ELA_Key']], 
                          how='left', on='ELA_Key').set_index(assmt_df.index)
assmt_df = assmt_df.merge(MATH_targets_df[['MATH_Target_Growth', 'MATH_Target_Growth_SD', 'MATH_Key']], 
                          how='left', on='MATH_Key').set_index(assmt_df.index)
assmt_df['ELA_Growth_v_Target'] = assmt_df['NWEA - ELA_Growth'] - assmt_df['ELA_Target_Growth']
assmt_df['MATH_Growth_v_Target'] = assmt_df['NWEA - MATH_Growth'] - assmt_df['MATH_Target_Growth']

# TODO
* associate students with ACM, filter for active students

# Analysis Questions
* visualize tutoring time against growth
* visualize O&C scores against growth
* visualize growth against test date (whether baseline came from prior year or from fall)