In [15]:
import pandas as pd
import numpy as np
import util

proficiency_path = './data/tn/proficiency/'

years = [
    2012
    , 2013
    , 2014
    , 2015
    , 2016
    , 2017
    , 2018
    , 2019
]

proficiency_files = [
    'tn_proficiency_2012_cleaned.csv'
    , 'tn_proficiency_2013_cleaned.csv'
    , 'tn_proficiency_2014_cleaned.csv'
    , 'tn_proficiency_2015_cleaned.csv'
    , 'tn_proficiency_2016_cleaned.csv'
    , 'tn_proficiency_2017_cleaned.csv'
    , 'tn_proficiency_2018_cleaned.csv'
    , 'tn_proficiency_2019_cleaned.csv'
]

df = pd.read_csv(proficiency_path + proficiency_files[0])
df['year'] = df.apply(lambda x: years[0], axis=1)

for i in range(1, len(proficiency_files)):
    df2 = pd.read_csv(proficiency_path + proficiency_files[i])
    df2['year'] = df2.apply(lambda x: years[i], axis=1)
    df = df.append(df2, ignore_index = True, sort=True)

In [None]:
# get school and district names for 2017 data
df_2017 = df[df['year'] = 2017]
df_all = df[df['year'] != 2017]

df_ids = df_all[['district_id', 'district', 'school_id', 'school']].drop_duplicates()
df_2017 = pd.merge(df_2017, df_ids, on=['district_id', 'school_id'])
print(df_2017.head())
df = df_all.append(df_2017, ignore_index = True)
del df_2017
del df_all
del df_ids

In [16]:
# rename aggregate group names
df['temp'] = [x if x != 'All' else 'All Groups' for x in df['group_state']]
del df['group_state']
df = df.rename(columns={"temp": "group_state"})

df['temp'] = [x if x != 'All Students' else 'All Groups' for x in df['group_state']]
del df['group_state']
df = df.rename(columns={"temp": "group_state"})

In [17]:
# create subject map
subject_map = [['Algebra I', 'Math']
               , ['Algebra II', 'Math']
               , ['Biology I', 'Science']
               , ['English I', 'ELA']
               , ['English II', 'ELA']
               , ['English III', 'ELA']
               , ['Math', 'Math']
               , ['Reading/Language', 'ELA']
               , ['Science', 'Science']
               , ['US History', 'US History']
               , ['RLA', 'ELA']
               , ['Social Studies', 'Social Studies']
               , ['Chemistry', 'Science']
               , ['Integrated Math I', 'Math']
               , ['Integrated Math II', 'Math']
               , ['Geometry', 'Math']
               , ['Integrated Math III', 'Math']
               , ['ELA', 'ELA']
              ]
df_subjects = pd.DataFrame(subject_map, columns = ['subject', 'subject_cleaned']) 

In [18]:
# join subject map to assessment data
df = pd.merge(df, df_subjects, on='subject')

In [19]:
del df['subject']
df = df.rename(columns={"subject_cleaned": "subject"})

In [20]:
id_vars = ['year', 'district_id', 'district', 'school_id', 'school', 'grade', 'group_state', 'subject', 'num_tested']
# reshape scores
df = pd.melt(df, id_vars=id_vars
        , value_vars=[
            'Percent Advanced'
            , 'Percent Approaching'
            , 'Percent Basic'
            , 'Percent Below'
            , 'Percent Below Basic'
            , 'Percent Below Basic or Basic'
            , 'Percent Mastered'
            , 'Percent On Track'
            , 'Percent On Track or Mastered'
            , 'Percent Proficient'
            , 'Percent Proficient or Advanced'],
        var_name='performance_level'
        , value_name='pct_at_level')
df = df[df['pct_at_level'] != '*']
df = df[df['pct_at_level'] != '**']

In [24]:
# clean performance level names
df['performance_level'] = df['performance_level'].str.replace('Percent ', '', regex=False)

0                         Advanced
2883641                Approaching
5767282                      Basic
8650923                      Below
11534564               Below Basic
14418205      Below Basic or Basic
17301846                  Mastered
20185487                  On Track
23069128      On Track or Mastered
25952769                Proficient
28836410    Proficient or Advanced
Name: performance_level, dtype: object


In [28]:
# create proficient_tf
def proficientTF (row):
    if row['performance_level'] == 'Advanced' \
        or row['performance_level'] == 'Mastered' \
        or row['performance_level'] == 'On Track' \
        or row['performance_level'] == 'Proficient' \
        or row['performance_level'] == 'Proficient or Advanced' \
        or row['performance_level'] == 'On Track or Mastered':
        return 1
    return 0
df['proficient_tf'] = df.apply(lambda row: proficientTF(row), axis=1)

In [29]:
# change pct_at_level to decimal
df['temp'] = float(df['pct_at_level']) / 100
del df['pct_at_level']
df = df.rename(columns={'temp': 'pct_at_level'})

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [None]:
# save as cleaned proficiency
df.to_csv('./data/finalized/tn_proficiency.csv', index=False)