In [44]:
# Dependencies
import requests
import json
from pprint import pprint
import pandas as pd
import numpy as np
import re
import hvplot.pandas
from pathlib import Path
from single_zip_function import get_schools_in_zip
from outliers_utility import single_year_outliers

# Import the API key
from config import geoapify_key

In [45]:
#set relevant year to search for outliers
my_year="2022"

#pull outliers from data
outliers_df = single_year_outliers(my_year)

In [46]:
#create list of zipcodes
zip_list = outliers_df["zip_code"].tolist()
zip_list = [str(int(z)) for z in zip_list if len(str(int(z))) == 5]

In [51]:
results=[] 

for zipcode in zip_list[:200]:   #change the number in zip_list parameter for how many schools to return
    area_df, zip_df = get_schools_in_zip(zipcode)
    num_schools_in_area = len(area_df)
    num_schools_in_zip = len(zip_df)
    if num_schools_in_area == 0:
        lat = None
        lon = None
    else:
        lat = area_df.iloc[0]['lat']
        lon = area_df.iloc[0]['lon']
    results.append({'Zip Code': zipcode, 'Lat': lat, 'Lon': lon, 
                    'Schools in Area': num_schools_in_area,
                    'Schools in Zip Code': num_schools_in_zip})

In [53]:
results_df = pd.DataFrame(results, columns = ['Lat', 'Lon', 'Schools in Area', 'Schools in Zip Code', 'Zip Code'])
results_df

Unnamed: 0,Lat,Lon,Schools in Area,Schools in Zip Code,Zip Code
0,42.232265,-85.106929,20,5,49033
1,37.079668,-77.581658,19,0,23840
2,40.564264,-87.241869,17,2,47971
3,40.943255,-87.451713,16,2,47963
4,38.982488,-85.738818,18,0,47273
...,...,...,...,...,...
195,32.989616,-79.643685,15,2,29429
196,41.663111,-80.858144,16,0,44085
197,35.701723,-81.912847,20,3,28761
198,35.490937,-82.992081,20,0,28751


In [70]:
#merge total market data with zip code GeoAPIfy data to obtain HPI data for the zip codes. 
total_market_csv = Path('../data/cleaned data/total_market_data_merged.csv')
total_market = pd.read_csv(total_market_csv)
total_market.rename(columns = {'zip_code':'Zip Code'}, inplace = True)
total_market['Zip Code'] = total_market['Zip Code'].apply(str)
zip_mapping = pd.merge(results_df, total_market, on='Zip Code', how="left")

#drop unnecessary columns
zip_mapping.drop(columns=['RECESSION_FLAG', 'avg_rate_for_year', 'Median_hh_income', 'Year_Avg_Unempl', 'state', 'bucketed_year', 'State_Num', 
                  'Percent Change in Resident Population'], axis=1, inplace=True)

#filter data to look at specific years 
zipmap2009 = zip_mapping[zip_mapping['year']==2009]
zipmap2022 = zip_mapping[zip_mapping['year']==2022]
zipmap2022

Unnamed: 0.1,Lat,Lon,Schools in Area,Schools in Zip Code,Zip Code,Unnamed: 0,Five-Digit ZIP Code,year,Annual Change (%),HPI,HPI from 2012,HPI with 2012 base,normalized_sale_price,Resident Population
30,42.232265,-85.106929,20,5,49033,321205,49033.0,2022,-6.36,257.41,162.03,1.588656,250445.0,10077331.0
50,37.079668,-77.581658,19,0,23840,420358,23840.0,2022,41.82,250.38,133.00,1.882556,235644.0,8631393.0
75,40.564264,-87.241869,17,2,47971,378858,47971.0,2022,-2.77,201.05,106.54,1.887085,135643.0,6785528.0
97,40.943255,-87.451713,16,2,47963,401621,47963.0,2022,36.44,230.53,133.25,1.730056,154467.0,6785528.0
122,38.982488,-85.738818,18,0,47273,378483,47273.0,2022,46.97,285.01,118.40,2.407179,199194.0,6785528.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5362,32.989616,-79.643685,15,2,29429,372483,29429.0,2022,33.73,387.79,174.35,2.224204,650542.0,5118425.0
5393,41.663111,-80.858144,16,0,44085,317051,44085.0,2022,-0.50,234.94,159.57,1.472332,220867.0,11799448.0
5415,35.701723,-81.912847,20,3,28761,396891,28761.0,2022,40.01,280.90,137.12,2.048571,409877.0,10439388.0
5441,35.490937,-82.992081,20,0,28751,365339,28751.0,2022,31.49,314.57,155.69,2.020489,357284.0,10439388.0


In [82]:
#map data from 2022
schoolsmap2022 = zipmap2022.hvplot.points('Lon', 'Lat', geo=True, frame_width = 800, frame_height = 600, tiles='OSM', 
                                      hover_cols=['Schools in Zip Code', 'Zip Code', 'Annual Change (%)'], size='Resident Population', scale=0.0015, 
                                      cnorm='linear', color='Annual Change (%)', 
                                      clabel='HPI Value', title = "2022 HPI Values and Number of Schools Mapped by Zip Code")
schoolsmap2022

In [79]:
#2009 data 
schoolmap2009 = zipmap2009.hvplot.points('Lon', 'Lat', geo=True, frame_width = 800, frame_height = 600, tiles='OSM', 
                                      hover_cols=['Schools in Zip Code', 'Zip Code', 'HPI'], size='Resident Population', scale=0.0015, colorbar='HPI', 
                                      clabel='HPI Value', title = "2009 HPI Values and Number of Schools Mapped by Zip Code")
schoolmap2009