# Merge local and federal list

Merges federal list of schools with California Department of Education list to identify schools that moved

In [36]:
import os
import pandas as pd

In [37]:
%store -r

Read in transformed federal list

In [38]:
open_df = lambda name: pd.read_csv(os.path.join(output_dir, name))

In [39]:
df1314 = open_df('df_1314.csv')

In [40]:
df1415 = open_df('df_1415.csv')

In [41]:
df1516 = open_df('df_1516.csv')

In [42]:
df1617 = open_df('df_1617.csv')

Outer merge all the files

This merge returns a list of all unique combinations of school code, street and city. Any schools that appear more than once have had a change in their city or street.

In [43]:
join_kwargs = dict(
    on=['school_code','street','city'],
    how='outer'
)

In [44]:
nces_total = df1314.merge(
    df1415,
    **join_kwargs
).merge(
    df1516,
    **join_kwargs
).merge(
    df1617,
    **join_kwargs
)

In [45]:
len(nces_total)

2554

Read in LA high schools from the state

In [46]:
la_high_schools = open_df('cleaned-la-high-schools.csv')

Filter down the NCES list to only those that are also in our CDE list.

In [47]:
nces_la = nces_total[nces_total.school_code.isin(la_high_schools.school_code)]

Merge the two lists to expand our pool of unique addresses

In [48]:
nces_and_local = nces_la.merge(la_high_schools, **join_kwargs)

In [49]:
nces_and_local.school_code.nunique()

356

In [50]:
la_high_schools.school_code.nunique()

356

Count how many schools have more than one unique address.

In [51]:
dupes = nces_and_local[nces_and_local.school_code.duplicated(keep=False)]

In [52]:
dupes.sort_values('school_code').count()

school_code    149
street         149
city           149
dtype: int64

Output the duplicates for manual review.

In [53]:
dupes.sort_values('school_code').reset_index().to_csv(
    os.path.join(output_dir, 'address-change-check.csv'),
    index=False
)