# Transform

Clean up the raw data from the state.

In [46]:
import os
import warnings
import pandas as pd

In [47]:
warnings.simplefilter("ignore")

In [48]:
input_dir = os.path.join(os.getcwd(), "input")

In [49]:
output_dir = os.path.join(os.getcwd(), "output")

In [50]:
os.path.exists(output_dir) or os.mkdir(output_dir)

True

Read in the school roster

In [51]:
roster_df = pd.read_csv(
    os.path.join(input_dir, "pubschls.txt"),
    dtype={"CDSCode": str},
    delimiter="\t",
    encoding="latin-1"
)

In [52]:
len(roster_df)

17816

Drop non-schools

In [53]:
schools_df = roster_df[~(roster_df.School.isnull())]

In [54]:
len(schools_df)

16442

Filter it down to active schools

In [55]:
active_df = schools_df[schools_df['StatusType'] == 'Active']

In [56]:
len(active_df)

10611

In [57]:
trimmed_df = active_df[[
    'CDSCode',
    'School',
    'District',
    'StreetAbr',
    'City',
    'County',
    'Zip',
    'Charter',
    'FundingType',
    'Latitude',
    'Longitude',
    'SOCType',
    'EILCode',
    'GSserved',
]]

Split grades

In [58]:
trimmed_df['low_grade_served'] = trimmed_df.GSserved.str.split('-').str.get(0)

In [59]:
trimmed_df['high_grade_served'] = trimmed_df.GSserved.str.split('-').str.get(1)

Clean up header names

In [62]:
cleaned_df = trimmed_df.rename(columns={
    'CDSCode': "cds_code",
    'School': "name",
    'District': "district",
    'StreetAbr': "street",
    'City': "city",
    'County': "county",
    'Zip': "zipcode",
    'Charter': "is_charter",
    'FundingType': "funding_type",
    'Latitude': "latitude",
    'Longitude': "longitude",
    'SOCType': "ownership",
    'EILCode': "instructional_level",
    'GSserved': "grades_served",
})

In [63]:
cleaned_df.head()

Unnamed: 0,cds_code,name,district,street,city,county,zipcode,is_charter,funding_type,latitude,longitude,ownership,instructional_level,grades_served,low_grade_served,high_grade_served
2,1100170112607,Envision Academy for Arts & Technology,Alameda County Office of Education,1515 Webster St.,Oakland,Alameda,94612-3355,Y,Directly funded,37.80452,-122.26815,High Schools (Public),HS,9-12,9,12
4,1100170123968,Community School for Creative Education,Alameda County Office of Education,2111 International Blvd.,Oakland,Alameda,94606-4903,Y,Directly funded,37.784648,-122.23863,Elementary Schools (Public),ELEM,K-8,K,8
5,1100170124172,Yu Ming Charter,Alameda County Office of Education,1086 Alcatraz Ave.,Oakland,Alameda,94608-1265,Y,Directly funded,37.847375,-122.28356,Elementary Schools (Public),ELEM,K-6,K,6
6,1100170125567,Urban Montessori Charter,Alameda County Office of Education,5328 Brann St.,Oakland,Alameda,94619-3312,Y,Directly funded,37.778352,-122.1895,Elementary Schools (Public),ELEM,K-6,K,6
8,1100170130401,Alameda County Juvenile Hall/Court,Alameda County Office of Education,2500 Fairmont Ave.,San Leandro,Alameda,94578-1005,N,,37.712878,-122.11173,Juvenile Court Schools,HS,8-12,8,12


In [64]:
cleaned_df.to_csv(
    os.path.join(output_dir, "public_schools.csv"),
    encoding="utf-8",
    index=False
)