## NYC Area School Research

### Goals:
- Gather all information from nyc, westchester, NJ (and maybe LI & CT) Schools
- Draw equivalencies between results/scores
- Map results, by grade

In [24]:
import os
import pyodbc
import requests
import subprocess
import zipfile

import pandas as pd


In [3]:
# convenience function to check for local version of file
def download_if_not_local(url, local_dir_for_data="."):
    filename = url.split("/")[-1]
    local_data_files = os.listdir(local_dir_for_data)
    try:
        os.listdir(local_dir_for_data).index(filename)
        print(filename + " present locally")
    except ValueError:
        print("Downloading " + filename)
        response = requests.get(url)
        with open(local_dir_for_data + "/" + filename, "wb") as f:
            f.write(response.content)
        print(filename + " downloaded")

In [4]:
# NJ Data
download_if_not_local(
    url="https://rc.doe.state.nj.us/ReportsDatabase/PerformanceReports.xlsx",
    local_dir_for_data="../data"
)

PerformanceReports.xlsx present locally


In [5]:
# NYS 3-8 Assessment
download_if_not_local(
    url="https://data.nysed.gov/files/assessment/16-17/3-8-2016-17.zip",
    local_dir_for_data="../data"
)

3-8-2016-17.zip present locally


In [6]:
# NYS Report Card database
download_if_not_local(
    url="https://data.nysed.gov/files/reportcards/16-17/SRC2017.zip",
    local_dir_for_data="../data"
)

SRC2017.zip present locally


In [7]:
# NYS Grad Rate Database
download_if_not_local(
    url="https://data.nysed.gov/files/gradrate/16-17/gradrate_2017.zip",
    local_dir_for_data="../data"
)

gradrate_2017.zip present locally


In [8]:
# unzip files (key), select file name (value[0])
# write to file (value[1])
zipped_files = {
    'gradrate_2017.zip' : ['GRAD_RATE_AND_OUTCOMES_2017.csv', 'nys_grad_rate.csv'],
    'SRC2017.zip' : ['SRC2017GroupIIRelease.mdb', 'nys_school_report_card.mdb'],
    '3-8-2016-17.zip' : ['3-8_ELA_AND_MATH_RESEARCHER_FILE_2017.csv', 'nys_grades_3_to_8.csv']
}


In [9]:
# unzip & rename
for z in zipped_files.keys():
    name_list = zipped_files.get(z)
    try:
        os.listdir('../data').index(name_list[1])
        print(name_list[1] + " present locally")
    except ValueError:
        target = zipfile.ZipFile('../data/' + z)
        with open('../data/' + name_list[1], 'wb') as output_file:
            output_file.write(target.read(name_list[0]))
        print(print(name_list[1] + " extracted"))

nys_grad_rate.csv present locally
nys_school_report_card.mdb present locally
nys_grades_3_to_8.csv present locally


In [10]:
# prep data into csvs for manipulation
# 1 Already prepped data
os.system('cp ../data/nys_grad_rate.csv ../prepared_data/nys_grad_rate.csv')
os.system('cp ../data/nys_grades_3_to_8.csv ../prepared_data/nys_grades_3_to_8.csv')

0

In [12]:
# 2 pull apart xlsx
xlsx_data = pd.read_excel('../data/PerformanceReports.xlsx', None)

In [18]:
# and write to individual csv files
name_prefix = '../prepared_data/nj_'
for k in xlsx_data.keys():
    xlsx_data[k].to_csv(name_prefix + k + '.csv', index = False)

In [40]:
# 3 pull apart mdb
mdb_file = '../data/nys_school_report_card.mdb'
table_list = subprocess.check_output(
    ['mdb-tables', '-d,', mdb_file]
).decode('utf-8').split(',')


In [49]:
name_prefix = '../prepared_data/nys_reportcard_'
for t in table_list:
    proc = subprocess.Popen(
        ['mdb-export', mdb_file, t],
        stdout=subprocess.PIPE
    )
    _ = pd.read_csv(proc.stdout)
    _.to_csv(name_prefix + t.replace('/','').replace(' ', '_') + '.csv', index = False)
    print('Wrote table ' + t)

  interactivity=interactivity, compiler=compiler, result=result)


Wrote table Accountability
Wrote table Attendance and Suspensions
Wrote table Average Class Size
Wrote table BEDS Day Enrollment
Wrote table BEDS_Day_Enrollment_By_Subgroup
Wrote table BOCES and N/RC
Wrote table ELA3 Subgroup Results
Wrote table ELA4 Subgroup Results
Wrote table ELA5 Subgroup Results
Wrote table ELA6 Subgroup Results
Wrote table ELA7 Subgroup Results
Wrote table ELA8 Subgroup Results
Wrote table High School Noncompleters
Wrote table High School Post-Graduation Plans of Completers
Wrote table Institution Grouping
Wrote table Math3 Subgroup Results
Wrote table Math4 Subgroup Results
Wrote table Math5 Subgroup Results
Wrote table Math6 Subgroup Results
Wrote table Math8 Subgroup Results
Wrote table New York State Alternate Assessment (NYSAA) Annual Results
Wrote table NYSESLAT Annual Results
Wrote table Recently Arrived ELL Students NOT Tested on ELA NYSTP
Wrote table Regents Common Core Examination Annual Results
Wrote table Regents Competency Test (RCT) Annual Results
W

EmptyDataError: No columns to parse from file