# RQ2: Are there any counties that don't have any higher education institutions (_education deserts_)?

## What are the (demographic) characteristics of those counties?

In [None]:
import pandas as pd
import geopandas as gpd
import geoplot as gplt
import geoplot.crs as gcrs
import matplotlib.pyplot as plt

from tools import tree
from pathlib import Path
from datetime import datetime as dt
today = dt.today().strftime("%d-%b-%y")

today

In [None]:
RAW_DATA = Path("../data/raw/")
INTERIM_DATA = Path("../data/interim/")
PROCESSED_DATA = Path("../data/processed/")
FINAL_DATA = Path("../data/final/")
EXTERNAL_DATA = Path("../data/external/")

In [None]:
tree(PROCESSED_DATA)

In [None]:
contiguous_us = gpd.read_file(PROCESSED_DATA / 'processed_geodata' / 'contiguous_us.shp')
geo_institutions = gpd.read_file(PROCESSED_DATA / 'processed_institutions' / 'geo_institutions.shp')

In [None]:
contiguous_us.head()

In [None]:
contiguous_us.columns = ['geoid', 'county_name', 'name', 'share_urm', 'geometry']

In [None]:
geo_institutions.head()

In [None]:
working_gdf = gpd.sjoin(contiguous_us, geo_institutions, how = 'inner', op = 'contains')

In [None]:
working_gdf.head()

In [None]:
working_gdf = working_gdf.drop_duplicates('name')

In [None]:
working_gdf.plot();

In [None]:
working_gdf.head()

In [None]:
counties_with_institutions = working_gdf[['geoid', 'county_name', 'name', 'share_urm', 'geometry']].copy()

In [None]:
counties_with_institutions.head()

In [None]:
list_of_counties_with_institutions = counties_with_institutions['name'].values

In [None]:
list_of_counties_with_institutions

In [None]:
mask_counties_without_institutions = ~contiguous_us['name'].isin(list_of_counties_with_institutions)

In [None]:
counties_without_institutions = contiguous_us[mask_counties_without_institutions].copy()

In [None]:
counties_without_institutions.plot();

In [None]:
counties_with_institutions.head()

In [None]:
counties_with_institutions['share_urm'].mean()

In [None]:
counties_without_institutions['share_urm'].mean()

# Run `censusdatadownloader` to get median age and median household income 

In [None]:
median_age = pd.read_csv(EXTERNAL_DATA / 'processed' / 'acs5_2018_medianage_counties.csv')
median_hh_income = pd.read_csv(EXTERNAL_DATA / 'processed' / 'acs5_2018_medianhouseholdincome_counties.csv')

In [None]:
median_age.head()

In [None]:
median_hh_income.head()

In [None]:
median_age = median_age[['geoid', 'name', 'median']]
median_hh_income = median_hh_income[['geoid', 'name', 'median']]

In [None]:
median_age.columns = ['geoid', 'name', 'median_age']
median_hh_income.columns = ['geoid', 'name', 'median_hh_income']

In [None]:
median_age.head()

In [None]:
median_age.dtypes

In [None]:
median_age['geoid'] = median_age['geoid'].astype(str).str.zfill(5)
median_hh_income['geoid'] = median_hh_income['geoid'].astype(str).str.zfill(5)

In [None]:
median_age.set_index('geoid', inplace = True)
median_hh_income.set_index('geoid', inplace = True)

In [None]:
median_age.head()

In [None]:
median_age.drop(columns = 'name', inplace = True)
median_hh_income.drop(columns = 'name', inplace = True)

In [None]:
counties_with_institutions.set_index('geoid', inplace = True)
counties_without_institutions.set_index('geoid', inplace = True)

In [None]:
counties_with_institutions = counties_with_institutions.join(median_age).join(median_hh_income)
counties_without_institutions = counties_without_institutions.join(median_age).join(median_hh_income)

In [None]:
counties_with_institutions.head()

In [None]:
counties_with_institutions.describe()

In [None]:
counties_without_institutions.describe()