In [None]:
import numpy as np
import os
from simple_cysh import * # includes pandas as pd
%matplotlib inline
os.chdir(r'Z:\ChiPrivate\Chicago Reports and Evaluation\SY18\Eval Management\ACM_analysis')

In [None]:
init_cysh()

def get_assmt_df():
    # get all assessments with desired fields
    df = get_cysh_df('Assesment__c', ['Id', 'Type__c', 'Date_Administered__c',
                                            'X0_to_300_Scaled_Score__c', 'Student__c'], rename_id=True)
    # get assessment type
    assmt_types = get_cysh_df('Picklist_Value__c', ['Id', 'Name'])
    assmt_types = assmt_types.rename(columns={'Id': 'Type__c', 'Name':'Assessment Type'})
    df = df.merge(assmt_types, how='left', on='Type__c'); del df['Type__c']
    
    return df

In [None]:
assmt_df = get_assmt_df()
# filter for valid assessments
assmt_df = assmt_df.loc[assmt_df['Assessment Type'].str.contains('NWEA')
                        & (assmt_df['X0_to_300_Scaled_Score__c']>=100)]
# pivot: one student per row, assessments as columns
assmt_df = assmt_df.pivot_table(index=['Student__c', 'Assessment Type'], columns='Date_Administered__c', values='X0_to_300_Scaled_Score__c')
assmt_df.reset_index(inplace=True)
# determine baseline and calculate growth (use spring where both spring and fall scores exist)
assmt_df['Baseline'] = assmt_df[['2017-07-01', '2017-09-20']].bfill(axis=1).iloc[:, 0]
assmt_df['Growth'] = assmt_df['2018-01-09'] - assmt_df['Baseline']
# remove records where no baseline present
assmt_df = assmt_df.loc[~assmt_df['Baseline'].isnull()]

In [None]:
# get current year grade level for each student
# assumption: all students advanced 1 grade between spring and fall
stu_sec_df = get_cysh_df('Student_Section__c', ['Student__c', 'Student_Grade__c'])
stu_sec_df.drop_duplicates('Student__c', inplace=True)
assmt_df = assmt_df.merge(stu_sec_df, how='left', on='Student__c')
assmt_df['Grade at Assessment'] = assmt_df['Student_Grade__c'].astype(float) - 1
assmt_df.loc[assmt_df['2017-07-01'].isnull(), 'Grade at Assessment'] = assmt_df['Student_Grade__c'].astype(float)
assmt_df['Season at Assessment'] = 'Spring'
assmt_df.loc[assmt_df['2017-07-01'].isnull(), 'Season at Assessment'] = 'Fall'

In [None]:
# Investigate how prior year spring scores compare to current year fall
# Also if students have equivalent spring and fall scores, it's likely an entry error
#     and need to determine which date the score actually occured on
StoF_assmt_df = assmt_df[~assmt_df['2017-07-01'].isnull() & ~assmt_df['2017-09-20'].isnull()].copy()
StoF_assmt_df.loc[:,'StoF'] = assmt_df['2017-09-20'] - assmt_df['2017-07-01']
StoF_assmt_df['StoF'].hist(bins=range(-30, 30, 1))
StoF_0diff_df = StoF_assmt_df.loc[StoF_assmt_df['StoF']==0]

# load salesforce tables
df = get_cysh_df('Student_Section__c', ['Student__c', 'School_Reference_Id__c'])
df.drop_duplicates('Student__c', inplace=True)
StoF_0diff_df = StoF_0diff_df.merge(df, how='left', on='Student__c'); del df

In [None]:
# # Merge in assessment id's so they can be deleted
# assmt_df = get_assmt_df()
# # filter for valid assessments
# assmt_df = assmt_df.loc[assmt_df['Assessment Type'].str.contains('NWEA')
#                         & (assmt_df['X0_to_300_Scaled_Score__c']>=100)]
# # pivot: one student per row, assessments as columns
# assmt_df['Key'] = assmt_df['Student__c']+ '_' + assmt_df['Assessment Type'] + '_' + assmt_df['Date_Administered__c']

# StoF_0diff_df['Key'] = StoF_0diff_df['Student__c']+ '_' + StoF_0diff_df['Assessment Type'] + '_2017-09-20'

# fuller_delete = assmt_df.loc[assmt_df['Key'].isin(StoF_0diff_df.loc[StoF_0diff_df['School_Reference_Id__c'].str.contains('Fuller'), 'Key'])]

# # Delete assessments
# for index, row in fuller_delete.iterrows():
#     try:
#         cysh.Assesment__c.delete(row['Assesment__c'])
#     except Exception as e:
#         print(e)
#         pass

In [None]:
# load in targets reference (produced in 'Generate Student Targets.ipynb')
targets_df = pd.read_csv('assessment_targets.csv')
targets_df.loc[targets_df['Subject']=='Reading', 'Subject'] = 'NWEA - ELA'
targets_df.loc[targets_df['Subject']=='Mathematics', 'Subject'] = 'NWEA - MATH'
targets_df['Key'] = (targets_df['Season'] + '_' 
                     + targets_df['Subject'] + '_' 
                     + targets_df['Grade'].astype(str) + '.0_'
                     + targets_df['Score'].astype(str) + '.0')

assmt_df['Key'] = (assmt_df['Season at Assessment'] + '_' 
                   + assmt_df['Assessment Type'] + '_'
                   + assmt_df['Grade at Assessment'].astype(str) + '_'
                   + assmt_df['Baseline'].astype(str))                             
assmt_df = assmt_df.merge(targets_df[['Key', 'Target Growth', 'Target Growth SD']], how='left', on='Key')
assmt_df['Growth_v_Target'] = assmt_df['Growth'] - assmt_df['Target Growth']

In [None]:
# # Investigate FL students who are outside the range of students eligible for FL
# nwea_avgs = pd.read_excel('NWEA_2015_averages.xlsx')
# nwea_avgs.loc[:, 'Subject'] = nwea_avgs['Subject'].map({'Reading':'NWEA - ELA', 'Math':'NWEA - MATH'})

# for subject in ['NWEA - ELA', 'NWEA - MATH']:
#     nwea_avgs_subj = nwea_avgs.loc[nwea_avgs['Subject']== subject]
    
#     for index, row in assmt_df.loc[assmt_df['Assessment Type']==subject].iterrows():
#         if row['Season at Assessment'] == 'Spring':
#             norm_col = 6
#         elif row['Season at Assessment'] == 'Fall':
#             norm_col = 2
#         for x in range(len(nwea_avgs_subj)):
#             if row['Baseline'] < nwea_avgs_subj.iloc[x,norm_col]:
#                 if nwea_avgs_subj.iloc[x,1] == 'K':
#                     assmt_df.loc[index, 'Grade Performance'] = -1
#                 elif nwea_avgs_subj.iloc[x,1] == 1:
#                     assmt_df.loc[index, 'Grade Performance'] = 0
#                 else:
#                     assmt_df.loc[index, 'Grade Performance'] = nwea_avgs_subj.iloc[x-1,1]
#                 break

# assmt_df['Grade_Performance_Diff'] = pd.to_numeric(assmt_df['Grade Performance'], errors='coerce') - pd.to_numeric(assmt_df['Grade at Assessment'], errors='coerce')

# # load salesforce tables
# student_section_df = get_cysh_df('Student_Section__c', ['Id', 'Name', 'Student_Program__c', 'Program__c', 'Section__c',
#                                                         'Active__c', 'Enrollment_End_Date__c', 'Student__c', 
#                                                         'Student_Name__c', 'Dosage_to_Date__c', 'School_Reference_Id__c'],
#                                  rename_id=True)
# section_df = get_cysh_df('Section__c', ['Id', 'Intervention_Primary_Staff__c'], rename_id=True)
# staff_df = get_cysh_df('Staff__c', ['Id', 'Name'], rename_id=True, rename_name=True)
# program_df = get_cysh_df('Program__c', ['Id', 'Name'], rename_id=True, rename_name=True)

# # merge salesforce tables
# df = student_section_df.merge(section_df, how='left', on='Section__c')
# df = df.merge(staff_df, how='left', left_on='Intervention_Primary_Staff__c', right_on='Staff__c')
# df = df.merge(program_df, how='left', on='Program__c')

# # group by Student_Program__c, sum ToT 
# df = df.join(df.groupby('Student_Program__c')['Dosage_to_Date__c'].sum(), how='left', on='Student_Program__c', rsuffix='_r')
# df.drop_duplicates('Student_Program__c', inplace=True)

# # filter out inactive students
# df = df[(df['Active__c']==True)
#         & df['Enrollment_End_Date__c'].isnull()]

# df.loc[:, 'Program__c_Name'] = df['Program__c_Name'].map({'Tutoring: Literacy':'NWEA - ELA', 'Tutoring: Math':'NWEA - MATH'})

# df['Key'] = df['Student__c'] + df['Program__c_Name']
# assmt_df['Key'] = assmt_df['Student__c'] + assmt_df['Assessment Type']

# df = df[['School_Reference_Id__c', 'Program__c_Name', 'Key']].merge(assmt_df, on='Key')

# df.loc[df['Grade_Performance_Diff']<-4, '4 Grades/More Behind'] = 1
# df.loc[(df['Grade_Performance_Diff']>=-4) & (df['Grade_Performance_Diff']<0), 'Within Bounds'] = 1
# df.loc[df['Grade_Performance_Diff']>=0, 'Current Grade/More Ahead'] = 1

# df.groupby(['School_Reference_Id__c'])['4 Grades/More Behind', 'Within Bounds', 'Current Grade/More Ahead'].count()

# df['Grade_Performance_Diff'].hist()

# df.to_csv('assessment_grade_performance.csv')

In [None]:
# filter out missing assessments
assmt_df = assmt_df.loc[~assmt_df['Growth_v_Target'].isnull()]
# encode as 1 if target met, 0 if not
assmt_df.loc[assmt_df.Growth_v_Target<0, 'Hit_Target?'] = 0
assmt_df.loc[assmt_df.Growth_v_Target>=0, 'Hit_Target?'] = 1

In [None]:
assmt_df[['Student__c', 'Assessment Type', 'Growth_v_Target', 'Hit_Target?']].to_csv('assessment_growth.csv', index=False)