<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-data" data-toc-modified-id="Load-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load data</a></span></li><li><span><a href="#Spatial-join-of-schools-and-blocks" data-toc-modified-id="Spatial-join-of-schools-and-blocks-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Spatial join of schools and blocks</a></span></li><li><span><a href="#Fix-treatment-status-of-schools" data-toc-modified-id="Fix-treatment-status-of-schools-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Fix treatment status of schools</a></span></li><li><span><a href="#Save" data-toc-modified-id="Save-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Save</a></span></li></ul></div>

**Description**: Reads in school data already merged with FOIA data and
performs spatial join with locations of census blocks,
fixes treatment status of schools and saves resulting dataset.

---

In [1]:
import pickle
from pathlib import Path

import geopandas as gpd
import pandas as pd
from tqdm import tqdm_notebook

# Load data

In [2]:
data_path = Path('../../data')

In [3]:
with (data_path / 'interim/schools_foia.pkl').open('rb') as f:
    schools = pickle.load(f)

with (data_path / 'processed/blocks.pkl').open('rb') as f:
    blocks = pickle.load(f)

# Spatial join of schools and blocks

Iterate through school years and match schools to blocks

In [4]:
blocks = blocks.rename({'treated': 'treated_block'}, axis='columns')
schools_blocks = []

for sy in tqdm_notebook(
        blocks['school_year'].unique(),
        total=blocks['school_year'].nunique()):
    schools_temp = schools[schools['school_year'] == sy]
    blocks_temp = blocks[blocks['school_year'] == sy][[
        'tract_bloc', 'geometry', 'treated_block'
    ]]
    schools_blocks_temp = gpd.sjoin(
        # Deep copies should not be needed anymore in
        # future version of geopandas (current 0.3.0)
        schools_temp.copy(),
        blocks_temp.copy(),
        how='left',
        op='intersects').reset_index(drop=True).drop(
            'index_right', axis='columns')
    schools_blocks.append(schools_blocks_temp)
schools_blocks = pd.concat(schools_blocks, ignore_index=True)
assert not schools_blocks['tract_bloc'].isnull().any()
assert (schools_blocks.groupby('school_id').size() <= 5).all
assert schools_blocks.shape[0] == schools.shape[0]
del schools
del blocks

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))




# Fix treatment status of schools
Assigns a school a treatment status of 1 (treated) if the school is
located in a census block which intersects with a Safe Passage route.

In [5]:
schools_blocks.drop('treated', axis='columns', inplace=True)
schools_blocks.rename(
    {
        'treated_block': 'treated'
    }, axis='columns', inplace=True)


# Save

In [6]:
schools_blocks = schools_blocks.sort_values(['school_name', 'school_year'])
with (data_path / 'processed/schools_blocks.pkl').open('wb') as f:
    pickle.dump(schools_blocks, f)