In [36]:
import numpy as np
import pandas as pd
import geopandas as gpd

In [20]:
# import google drive folder
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
# read data
median_income = pd.read_csv('merged_file.csv')

# read hispanic median income
hisp_income = pd.read_csv('medianHouseholdIncomeHispanic.csv')

# read white median income
white_income = pd.read_csv('medianHouseholdIncomeWhite.csv')

# read black median income
black_income = pd.read_csv('medianHouseholdIncomeBlack.csv')

In [28]:
hisp_income.sample()
white_income.sample()
black_income.sample()

Unnamed: 0,Label (Grouping),Census Tract 101; Cook County; Illinois!!Estimate,Census Tract 102.01; Cook County; Illinois!!Estimate,Census Tract 102.02; Cook County; Illinois!!Estimate,Census Tract 103; Cook County; Illinois!!Estimate,Census Tract 104; Cook County; Illinois!!Estimate,Census Tract 105.01; Cook County; Illinois!!Estimate,Census Tract 105.02; Cook County; Illinois!!Estimate,Census Tract 105.03; Cook County; Illinois!!Estimate,Census Tract 106; Cook County; Illinois!!Estimate,...,Census Tract 8435; Cook County; Illinois!!Estimate,Census Tract 8436; Cook County; Illinois!!Estimate,Census Tract 8437; Cook County; Illinois!!Estimate,Census Tract 8438; Cook County; Illinois!!Estimate,Census Tract 8439; Cook County; Illinois!!Estimate,Census Tract 8446; Cook County; Illinois!!Estimate,Census Tract 8447; Cook County; Illinois!!Estimate,Census Tract 9800; Cook County; Illinois!!Estimate,Census Tract 9801; Cook County; Illinois!!Estimate,Census Tract 9900; Cook County; Illinois!!Estimate
0,Median household income in the past 12 months ...,24632,34330,41792,68632,62577,34063,44375,18214,58125,...,-,44625,-,26125,62944,57917,52900,-,-,-


In [29]:
# Remove the last 33 characters from all column headers except for the first column
new_columns = [hisp_income.columns[0]] + [column[:-33] for column in hisp_income.columns[1:]]
hisp_income.columns = new_columns

new_columns = [white_income.columns[0]] + [column[:-33] for column in white_income.columns[1:]]
white_income.columns = new_columns

new_columns = [black_income.columns[0]] + [column[:-33] for column in black_income.columns[1:]]
black_income.columns = new_columns

In [30]:
# Assuming df is your DataFrame
hisp_income.drop(hisp_income.columns[0], axis=1, inplace=True)
white_income.drop(white_income.columns[0], axis=1, inplace=True)
black_income.drop(black_income.columns[0], axis=1, inplace=True)

In [31]:
# Transpose each DataFrame
hisp_income = hisp_income.T
hisp_income.sample(5)

white_income = white_income.T
white_income.sample(5)

black_income = black_income.T
black_income.sample(5)

Unnamed: 0,0
Census Tract 8102,61771
Census Tract 2925,52214
Census Tract 8048.05,-
Census Tract 6706,-
Census Tract 8340,39866


In [32]:
# Change the names of the two columns
new_column_names = ['Hispanic Household Income']
hisp_income.columns = new_column_names

new_column_names = ['White Household Income']
white_income.columns = new_column_names

new_column_names = ['Black Household Income']
black_income.columns = new_column_names

In [33]:
# Merge the DataFrame with the GeoDataFrame based on the common column 'New_Column_1'
median_income = pd.merge(left=hisp_income, right=median_income, left_on=hisp_income.index, right_on='namelsad10', how='inner')
median_income = pd.merge(left=white_income, right=median_income, left_on=white_income.index, right_on='namelsad10', how='inner')
median_income = pd.merge(left=black_income, right=median_income, left_on=black_income.index, right_on='namelsad10', how='inner')

median_income.sample()

Unnamed: 0,Black Household Income,White Household Income,Hispanic Household Income,Median_Household_Income,statefp10,name10,commarea_n,namelsad10,commarea,geoid10,notes,tractce10,countyfp10,geometry
429,20950,-,-,21275.0,17,4201.0,42,Census Tract 4201,42,17031420100,,420100,31,MULTIPOLYGON (((-87.56441000004487 41.78666299...


In [34]:
# Save the merged DataFrame to a new CSV file
median_income.to_csv('censusIncome.csv', index=False)

In [51]:
# read census tract bouandareis
tracts = gpd.read_file('/content/drive/My Drive/480-497-Demographic TIF Team/Boundaries/Boundaries - Census Tracts - 2010.geojson')
# link: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2010/5jrd-6zik

#read new census income
censusIncome = pd.read_csv('censusIncomeCopy.csv')

In [52]:
tracts.sample()

Unnamed: 0,statefp10,name10,commarea_n,namelsad10,commarea,geoid10,notes,tractce10,countyfp10,geometry
596,17,2511,25,Census Tract 2511,25,17031251100,,251100,31,"MULTIPOLYGON (((-87.75377 41.89496, -87.75407 ..."


In [53]:
censusIncome = median_income.merge(tracts, on='namelsad10')

In [54]:
censusIncome.sample()

Unnamed: 0,Black Household Income,White Household Income,Hispanic Household Income,Median_Household_Income,statefp10_x,name10_x,commarea_n_x,namelsad10,commarea_x,geoid10_x,...,geometry_x,statefp10_y,name10_y,commarea_n_y,commarea_y,geoid10_y,notes_y,tractce10_y,countyfp10_y,geometry_y
71,-,211000,118125,180500.0,17,510.0,5,Census Tract 510,5,17031051000,...,MULTIPOLYGON (((-87.68713100001588 41.93946700...,17,510,5,5,17031051000,,51000,31,"MULTIPOLYGON (((-87.68713 41.93947, -87.68757 ..."


In [55]:
print(censusIncome.columns)

Index(['Black Household Income', 'White Household Income',
       'Hispanic Household Income', 'Median_Household_Income', 'statefp10_x',
       'name10_x', 'commarea_n_x', 'namelsad10', 'commarea_x', 'geoid10_x',
       'notes_x', 'tractce10_x', 'countyfp10_x', 'geometry_x', 'statefp10_y',
       'name10_y', 'commarea_n_y', 'commarea_y', 'geoid10_y', 'notes_y',
       'tractce10_y', 'countyfp10_y', 'geometry_y'],
      dtype='object')


In [56]:
print(censusIncome.dtypes)

Black Household Income         object
White Household Income         object
Hispanic Household Income      object
Median_Household_Income       float64
statefp10_x                     int64
name10_x                      float64
commarea_n_x                    int64
namelsad10                     object
commarea_x                      int64
geoid10_x                       int64
notes_x                        object
tractce10_x                     int64
countyfp10_x                    int64
geometry_x                     object
statefp10_y                    object
name10_y                       object
commarea_n_y                   object
commarea_y                     object
geoid10_y                      object
notes_y                        object
tractce10_y                    object
countyfp10_y                   object
geometry_y                   geometry
dtype: object


In [None]:
censusIncome.drop(columns=['statefp10_x','name10_x','commarea_n_x','geoid10_x','notes_x','tractce10_x','countyfp10_x','geometry_x'])

In [73]:
# Save the merged DataFrame to a new CSV file
censusIncome.to_csv('censusIncomeFinal.csv', index=False)

In [44]:
# import packages
import plotly.express as px
import plotly.graph_objects as go

In [71]:
# Create a base layer for the interactive heatmap for census tracts
fig = px.choropleth_mapbox(
    censusIncome,
    geojson=tracts.geometry,
    locations=censusIncome.index,
    color='Median_Household_Income',  # Color based on population
    color_continuous_scale="Viridis",  # Choose a color scale
    range_color=[min(censusIncome['Median_Household_Income']), 190000],  # Set color range explicitly
    mapbox_style="carto-positron",
    center={"lat": tracts.centroid.y.mean(), "lon": tracts.centroid.x.mean()},
    zoom=10,
    opacity=0.5,
    labels={'namelsad10': 'tract'},
    hover_name='namelsad10',  # Display community area name on hover
)


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [72]:
fig.show()

Output hidden; open in https://colab.research.google.com to view.