This notebook loads in data, filters to counties and census tracts in the Orlando-Kissimmee-Sanford CBSA, and shows how the chart functions work to visualize data.
I didn't examine the results for the getting started questions, but these charts help visualize the target community.
Next steps will center around the housing inventory questions.

In [1]:
import pandas as pd
import numpy as np
import itertools
import requests
import json
import plotly
import geopandas as gpd
import plotly.express as px
from shapely.geometry import shape

In [2]:
from create_plot_functions import create_county_barchart
from create_plot_functions import create_tract_barchart
from create_plot_functions import create_heatmap
from create_plot_functions import create_county_income_race_dotplot
from create_plot_functions import create_tract_income_race_dotplot
from create_plot_functions import create_county_income_age_dotplot
from create_plot_functions import create_tract_income_age_dotplot

In [3]:
df_fl = pd.read_csv("datakit-housing-fall-2024-1.0/housing-data/FL/data_1-FL.csv")
df_data_dic = pd.read_csv("datakit-housing-fall-2024-1.0/housing-data/FL/data_dictionary_1-FL.csv")
#crosswalk from https://www.nber.org/research/data/census-core-based-statistical-area-cbsa-federal-information-processing-series-fips-county-crosswalk
geo_crosswalk = pd.read_csv("datakit-housing-fall-2024-1.0/housing-data/FL/cbsa2fipsxw.csv")

In [4]:
oks_counties = geo_crosswalk[geo_crosswalk["cbsatitle"] == "Orlando-Kissimmee-Sanford, FL"]
df_oks = df_fl[df_fl["county"].isin(oks_counties["fipscountycode"])]

In [5]:
#from HUD_API_Calls import get_county_codes
#from HUD_API_Calls import get_county_il_data
#hud_token = ""
#hud_fl_county_codes = get_county_codes(hud_token, "FL")
#hud_fl_il = get_county_il_data(hud_token, hud_fl_county_codes)
#hud_fl_il.to_excel('datakit-housing-fall-2024-1.0/housing-data/FL/hud_fl_il.xlsx', index=False) 
# Read the Excel file back into a DataFrame

#used code ^ and saved file
hud_fl_il = pd.read_excel('datakit-housing-fall-2024-1.0/housing-data/FL/hud_fl_il.xlsx')

In [6]:
var_rename = dict(zip(df_data_dic["field_name"], df_data_dic["dk_column_name"]))
df_oks = df_oks.rename(columns=var_rename)

In [7]:
#get census tracts and county names
df_oks["geoid"] = df_oks["geoid"].astype(str)
df_oks['census_tract'] = df_oks['geoid'].str[-6:]
#make county geoid column to merge hud_il data
df_oks["county_fips"] = df_oks["geoid"].str[:5]
hud_fl_il["county_fips"] = hud_fl_il["fips_code"].astype(str).str[:5]
df_oks = pd.merge(df_oks, hud_fl_il, on="county_fips", how="inner")

In [8]:
#geo files
shp_fl = gpd.read_file("datakit-housing-fall-2024-1.0/housing-data/FL/cb_2020_12_tract_500k.shp")
df_oks_geo = shp_fl.merge(df_oks, left_on="GEOID", right_on="geoid", how="inner")
geojson_data = json.loads(df_oks_geo.to_json())
#load in counties for background layer
county_gdf = gpd.read_file("cb_2020_us_county_20m/cb_2020_us_county_20m.shp")
county_gdf = county_gdf[county_gdf["STUSPS"] == "FL"]
county_geojson = json.loads(county_gdf.to_json()) 

In [9]:
#column names to reference for charts
for idx,col in enumerate(df_oks.columns):
    print(idx,col)

0 geoid
1 geoid_year
2 state
3 county
4 state_fips_code
5 county_fips_code
6 ACS - Gini Index of Income Inequality - Estimate
7 ACS - Gini Index of Income Inequality - Margin of Error
8 CDFI Fund (Areas of Economic Distress) - Aggregated by Land
9 CDFI Fund (Areas of Economic Distress) - Simple Aggregation
10 CDFI Fund Investment Areas
11 HUD - Opportunity Zones (U.S. Department of Housing and Urban Development)
12 ACS - Employment Status for the Population 16 Years and Over (In the Labor Forces) - Estimate
13 ACS - Employment Status for the Population 16 Years and Over (In the Labor Forces) - Margin of Error
14 ACS - Employment Status for the Population 16 Years and Over (In the Labor Forces) - By Employment Status - Number Employed - Estimate
15 ACS - Employment Status for the Population 16 Years and Over (In the Labor Forces) - By Employment Status - Number Employed - Margin of Error
16 ACS - Employment Status for the Population 16 Years and Over (In the Labor Forces) - By Employmen

In [10]:
#population distribution columns for bar charts
pop_cols = [col for col in df_oks.columns if "Population Percentage" in col and "Estimate" in col or "by Race" in col]

In [11]:
#names and indices to reference
for idx,col in enumerate(pop_cols):
    print(idx,col)

0 ACS - Population Percentage by Race (Two or More Races)
1 ACS - Population Percentage by Race (White alone)
2 ACS - Population Percentage by Race (Black or African American alone)
3 ACS - Population Percentage by Race (American Indian and Alaska Native alone)
4 ACS - Population Percentage by Race (Asian alone)
5 ACS - Population Percentage by Race (Native Hawaiian and Other Pacific Islander alone)
6 ACS - Population Percentage by Race (Some Other Race Alone)
7 ACS - Population Percentage by Age (5 to 14 years) - Estimate
8 ACS - Population Percentage by Age (5 to 14 years) - By Sex - Percentage of Total Male Population - Estimate
9 ACS - Population Percentage by Age (5 to 14 years) - By Sex - Percentage of Total Female Population - Estimate
10 ACS - Population Percentage by Age (15 to 17 years) - Estimate
11 ACS - Population Percentage by Age (15 to 17 years) - By Sex - Percentage of Total Male Population - Estimate
12 ACS - Population Percentage by Age (15 to 17 years) - By Sex - Pe

In [12]:
#counties to reference
df_oks["county_name"].unique()

array(['Lake County', 'Orange County', 'Osceola County',
       'Seminole County'], dtype=object)

In [13]:
#pass the dataframe, a county name from the list above, and a population variable from the list above to view bar chart
create_county_barchart(df_oks, "Lake County", pop_cols[19])

In [14]:
#pass the dataframe, a census tract, and a list of population variables to compare within a census tract
create_tract_barchart(df_oks, "030104", [pop_cols[20], pop_cols[21]])

In [15]:
#pass geo objects and a variable to view heatmap of variable across census tracts
create_heatmap(df_oks_geo, geojson_data, "ACS - Median Monthly Housing Cost (Occupied Housing Units) - Estimate",county_geojson)

In [16]:
income_vars = [col for col in df_oks.columns if "Median Household Income" in col and "Estimate" in col]
geo_vars = list(df_oks.columns[0:5])
housing_cost_vars = list(df_oks.columns[[379,381,383]])
hud_vars = list(df_oks.columns[385:413])
df_oks_income = df_oks[geo_vars + income_vars + housing_cost_vars + hud_vars]

In [17]:
#pass the income subset df, county name, and income limit variable to use and display as the vertical red dotted line
create_county_income_race_dotplot(df_oks_income, "Lake County", "very_low-il50_p4")

In [18]:
create_tract_income_race_dotplot(df_oks_income, "Lake County", ["031312","030702","030601","030206"], "very_low-il50_p4")

In [19]:
create_county_income_race_dotplot(df_oks_income, "Orange County", "very_low-il50_p4")

In [20]:
create_county_income_race_dotplot(df_oks_income, "Osceola County", "very_low-il50_p4")

In [21]:
create_county_income_race_dotplot(df_oks_income, "Seminole County", "very_low-il50_p4")

In [22]:
create_county_income_age_dotplot(df_oks_income, "Lake County", "very_low-il50_p4")

In [23]:
create_county_income_age_dotplot(df_oks_income, "Orange County", "very_low-il50_p4")

In [24]:
create_county_income_age_dotplot(df_oks_income, "Osceola County", "very_low-il50_p4")

In [25]:
create_county_income_age_dotplot(df_oks_income, "Seminole County", "very_low-il50_p4")