# Attribute and Spatial Joins Solutions

In [None]:
import pandas as pd
import geopandas as gpd

import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline  

In [None]:
acs5_df = pd.read_csv("../data/census/ACS5yr/census_variables_CA.csv", dtype={'FIPS_11_digit': str})
acs5_df_ac = acs5_df[(acs5_df['year'] == 2018) & (acs5_df['county_fips'] == 1)]

In [None]:
tracts_gdf = gpd.read_file("zip://../data/census/Tracts/cb_2013_06_tract_500k.zip")
tracts_gdf_ac = tracts_gdf[tracts_gdf['COUNTYFP'] == '001']

In [None]:
tracts_acs_gdf_ac = tracts_gdf_ac.merge(acs5_df_ac,
                                        left_on='GEOID',
                                        right_on='FIPS_11_digit',
                                        how='left')

In [None]:
# Import schools data
schools_df = pd.read_csv('../data/alco_schools.csv')
# Convert to GeoDataFrame
schools_gdf = gpd.GeoDataFrame(schools_df, 
                               geometry=gpd.points_from_xy(schools_df.X, schools_df.Y))
# Convert CRS
schools_gdf.crs = "epsg:4326"
schools_gdf = schools_gdf.to_crs(tracts_acs_gdf_ac.crs)

---

### Challenge 1: Choropleth Map

We can now make choropleth maps using our attribute-joined GeoDataFrame. Go ahead and pick one variable to color the map, then map it. You can go back to lesson 5 if you need a refresher on how to make this!

---

In [None]:
fig, ax = plt.subplots(figsize=(10, 5)) 
tracts_acs_gdf_ac.plot(column='p_renters', 
                       scheme="quantiles",
                       legend=True,
                       ax=ax,
                       cmap='magma')
ax.set_title("Percentage of Renters")
plt.show()

---

### Challenge 2: Confidence Checks

As always, we want to perform a confidence check on our intermediate result before we rush ahead.

One way to do that is to introspect the structure of the result object a bit.

1. What type of object should that have given us?
2. What should the dimensions of that object be, and why?
3. If we wanted a visual check of our results (i.e. a plot or map), what could we do?

---

In [None]:
schools_gdf_api = schools_gdf[schools_gdf['API'] > 0]

In [None]:
schools_jointracts = gpd.sjoin(left_df=schools_gdf_api,
                               right_df=tracts_acs_gdf_ac,
                               how='left')

In [None]:
type(schools_jointracts)

In [None]:
print(schools_gdf.shape)
print(tracts_acs_gdf_ac.shape)
print(schools_jointracts.shape)

In [None]:
schools_jointracts.plot()

In [None]:
ax = tracts_acs_gdf_ac.plot(color='white',
                            edgecolor='black',
                            figsize=(18, 18))
schools_jointracts.iloc[:16].plot(column='GEOID', ax=ax, legend=True)

---

### Challenge 3: Aggregation

What is the mean API of each Census tract?

As we mentioned, the spatial aggregation workflow that we just put together above could have been used not to generate a new count variable, but also to generate any other new variable the results from calling an aggregation function on an attribute column.

In this case, we want to calculate and map the mean API of the schools in each Census tract.

Copy and paste code from above where useful, then tweak and/or add to that code. Do the following:

1. Join the schools onto the tracts (**HINT**: make sure to decide whether or not you want to include schools with API = 0!).
2. Dissolves that joined object by the tract IDs, giving you a new GeoDataFrame with each tract's mean API (**HINT**: because this is now a different calculation, different problems may arise and need handling!).
3. Plot the tracts, colored by API scores (**HINT**: overlay the schools points again, visualizing them in a way that will help you visually check your results!).

---

In [None]:
# Join the schools onto the tracts (excluding 0 API schools)
tracts_joinschools_api = gpd.sjoin(left_df=schools_gdf_api,
                                   right_df=tracts_acs_gdf_ac,
                                   how='right')

In [None]:
# Dissolve the tracts by GEOID, using 'API' as the column we'll aggregate and 'mean' as our aggfunc
tracts_mean_api = tracts_joinschools_api[['GEOID', 'API', 'geometry']].dropna(how='any').dissolve(by='GEOID', aggfunc='mean')

In [None]:
# Plot the tracts, coloring them by mean API
fig, ax = plt.subplots(figsize = (20, 20)) 
tracts_mean_api.plot(ax=ax,
                     column='API', 
                     scheme='equalinterval',
                     cmap="plasma",
                     edgecolor="grey",
                     legend=True,
                     legend_kwds={'title': 'Mean API'})
# Add the schools, coloring them by API (so we can check that the mean APIs we calculated are resonable)
schools_gdf_api.plot(ax=ax,
                     column='API',
                     cmap='plasma',
                     edgecolor='black',
                     linewidth=0.3,
                     markersize=25,
                     legend=True,
                     legend_kwds={'label': "API (schools)",
                                  'orientation': "horizontal",
                                  'location': 'bottom',
                                  'shrink': 0.75,
                                  'pad': 0.05})