# US CT Interactive Map

In [1]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
import pydeck as pdk

## Population Data

In [2]:
df = pd.read_csv("./data/DECENNIALDHC2020.P1-2023-09-13T170415.csv")

In [3]:
df.shape

(1, 884)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Columns: 884 entries, Label (Grouping) to Census Tract 9081, Windham County, Connecticut
dtypes: int64(10), object(874)
memory usage: 7.0+ KB


In [5]:
df.drop(columns=['Label (Grouping)'], inplace=True)

In [6]:
df = df.T

In [7]:
df.columns = ['POPULATION']

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 883 entries, Census Tract 101.01, Fairfield County, Connecticut to Census Tract 9081, Windham County, Connecticut
Data columns (total 1 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   POPULATION  883 non-null    object
dtypes: object(1)
memory usage: 13.8+ KB


In [9]:
df = df.rename_axis('Info').reset_index()

In [10]:
df["NAME"] = df.apply(lambda x: x['Info'].split(",")[0].split(" ")[2], axis=1)

In [11]:
df['COUNTY'] = df.apply(lambda x: x['Info'].split(",")[1], axis=1)

In [12]:
df.head(3)

Unnamed: 0,Info,POPULATION,NAME,COUNTY
0,"Census Tract 101.01, Fairfield County, Connect...",4401,101.01,Fairfield County
1,"Census Tract 101.02, Fairfield County, Connect...",4276,101.02,Fairfield County
2,"Census Tract 102.01, Fairfield County, Connect...",3395,102.01,Fairfield County


In [13]:
df.drop(columns=['Info'], inplace=True)

In [14]:
df.head()

Unnamed: 0,POPULATION,NAME,COUNTY
0,4401,101.01,Fairfield County
1,4276,101.02,Fairfield County
2,3395,102.01,Fairfield County
3,5904,102.02,Fairfield County
4,4370,103.0,Fairfield County


In [15]:
df.shape

(883, 3)

## Shapefiles

In [16]:
gdf = gpd.read_file("./data/tl_2020_09_tract/tl_2020_09_tract.shp")

In [17]:
gdf.shape

(883, 13)

In [18]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 883 entries, 0 to 882
Data columns (total 13 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   STATEFP   883 non-null    object  
 1   COUNTYFP  883 non-null    object  
 2   TRACTCE   883 non-null    object  
 3   GEOID     883 non-null    object  
 4   NAME      883 non-null    object  
 5   NAMELSAD  883 non-null    object  
 6   MTFCC     883 non-null    object  
 7   FUNCSTAT  883 non-null    object  
 8   ALAND     883 non-null    int64   
 9   AWATER    883 non-null    int64   
 10  INTPTLAT  883 non-null    object  
 11  INTPTLON  883 non-null    object  
 12  geometry  883 non-null    geometry
dtypes: geometry(1), int64(2), object(10)
memory usage: 89.8+ KB


## Merging Population to Shapefiles

In [20]:
gdf.shape

(883, 13)

In [21]:
df.shape

(883, 3)

In [22]:
complete_df = gdf.merge(df, how="left")

In [24]:
complete_df.head(1)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,POPULATION,COUNTY
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,"POLYGON ((-73.03440 41.55468, -73.03428 41.555...",2846,New Haven County


In [25]:
complete_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 887 entries, 0 to 886
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   STATEFP     887 non-null    object  
 1   COUNTYFP    887 non-null    object  
 2   TRACTCE     887 non-null    object  
 3   GEOID       887 non-null    object  
 4   NAME        887 non-null    object  
 5   NAMELSAD    887 non-null    object  
 6   MTFCC       887 non-null    object  
 7   FUNCSTAT    887 non-null    object  
 8   ALAND       887 non-null    int64   
 9   AWATER      887 non-null    int64   
 10  INTPTLAT    887 non-null    object  
 11  INTPTLON    887 non-null    object  
 12  geometry    887 non-null    geometry
 13  POPULATION  887 non-null    object  
 14  COUNTY      887 non-null    object  
dtypes: geometry(1), int64(2), object(12)
memory usage: 110.9+ KB


In [30]:
complete_df.head(1)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,POPULATION,COUNTY
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,"POLYGON ((-73.03440 41.55468, -73.03428 41.555...",2846,New Haven County


In [136]:
complete_df.to_file("./data/connecticut_tracts.json", driver="GeoJSON")

# Mapping

In [31]:
ct_tracts = gpd.read_file("./data/connecticut_tracts.json")

In [32]:
ct_tracts.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   STATEFP     887 non-null    object  
 1   COUNTYFP    887 non-null    object  
 2   TRACTCE     887 non-null    object  
 3   GEOID       887 non-null    object  
 4   NAME        887 non-null    object  
 5   NAMELSAD    887 non-null    object  
 6   MTFCC       887 non-null    object  
 7   FUNCSTAT    887 non-null    object  
 8   ALAND       887 non-null    int64   
 9   AWATER      887 non-null    int64   
 10  INTPTLAT    887 non-null    object  
 11  INTPTLON    887 non-null    object  
 12  POPULATION  887 non-null    object  
 13  COUNTY      887 non-null    object  
 14  geometry    887 non-null    geometry
dtypes: geometry(1), int64(2), object(12)
memory usage: 104.1+ KB


In [33]:
ct_tracts.head(2)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,POPULATION,COUNTY,geometry
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,2846,New Haven County,"POLYGON ((-73.03440 41.55468, -73.03428 41.555..."
1,9,9,350500,9009350500,3505,Census Tract 3505,G5020,S,781540,0,41.5477163,-73.0339,2693,New Haven County,"POLYGON ((-73.04241 41.55167, -73.04230 41.551..."


> Was having a problem with the above geojson file. As it was exactly what I wanted but when I created the map only a few tracts where being extruded. After looking at the dataframe it is clear now that the reason has to do with the POPULATION column data type. The comma is throwing it off and it's not multiplying it for the elevation parameter in the Layer constructor.  

In [34]:
ct_tracts['POPULATION'] = ct_tracts['POPULATION'].apply(lambda x: int(x.replace(",", "")))

In [35]:
ct_tracts.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   STATEFP     887 non-null    object  
 1   COUNTYFP    887 non-null    object  
 2   TRACTCE     887 non-null    object  
 3   GEOID       887 non-null    object  
 4   NAME        887 non-null    object  
 5   NAMELSAD    887 non-null    object  
 6   MTFCC       887 non-null    object  
 7   FUNCSTAT    887 non-null    object  
 8   ALAND       887 non-null    int64   
 9   AWATER      887 non-null    int64   
 10  INTPTLAT    887 non-null    object  
 11  INTPTLON    887 non-null    object  
 12  POPULATION  887 non-null    int64   
 13  COUNTY      887 non-null    object  
 14  geometry    887 non-null    geometry
dtypes: geometry(1), int64(3), object(11)
memory usage: 104.1+ KB


> Now adding a column with the density of population per square mile

In [36]:
ct_tracts["POPDEN"]=ct_tracts['POPULATION']/(ct_tracts['ALAND']*0.0000003861)

In [37]:
ct_tracts.head(2)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,POPULATION,COUNTY,geometry,POPDEN
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,2846,New Haven County,"POLYGON ((-73.03440 41.55468, -73.03428 41.555...",17358.498149
1,9,9,350500,9009350500,3505,Census Tract 3505,G5020,S,781540,0,41.5477163,-73.0339,2693,New Haven County,"POLYGON ((-73.04241 41.55167, -73.04230 41.551...",8924.529742


> Adding a column with the POPDEN normalized using Min-Max feature scaling to have a value between 0 and 1 for the alpha in fill color

In [38]:
ct_tracts["POPDENNORM"] = ct_tracts['POPDEN'].apply(
    lambda x: (255+((x - ct_tracts['POPDEN'].min())*(255)) )
            / (ct_tracts['POPDEN'].max() - ct_tracts['POPDEN'].min()) )

In [39]:
ct_tracts.head(2)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,POPULATION,COUNTY,geometry,POPDEN,POPDENNORM
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,2846,New Haven County,"POLYGON ((-73.03440 41.55468, -73.03428 41.555...",17358.498149,117.144622
1,9,9,350500,9009350500,3505,Census Tract 3505,G5020,S,781540,0,41.5477163,-73.0339,2693,New Haven County,"POLYGON ((-73.04241 41.55167, -73.04230 41.551...",8924.529742,60.230878


In [41]:
ct_tracts.isnull().sum()

STATEFP       0
COUNTYFP      0
TRACTCE       0
GEOID         0
NAME          0
NAMELSAD      0
MTFCC         0
FUNCSTAT      0
ALAND         0
AWATER        0
INTPTLAT      0
INTPTLON      0
POPULATION    0
COUNTY        0
geometry      0
POPDEN        8
POPDENNORM    8
dtype: int64

In [42]:
ct_tracts.fillna(0, inplace=True)

In [43]:
ct_tracts.isnull().sum()

STATEFP       0
COUNTYFP      0
TRACTCE       0
GEOID         0
NAME          0
NAMELSAD      0
MTFCC         0
FUNCSTAT      0
ALAND         0
AWATER        0
INTPTLAT      0
INTPTLON      0
POPULATION    0
COUNTY        0
geometry      0
POPDEN        0
POPDENNORM    0
dtype: int64

In [45]:
ct_tracts.head(1)

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,POPULATION,COUNTY,geometry,POPDEN,POPDENNORM
0,9,9,350400,9009350400,3504,Census Tract 3504,G5020,S,424642,0,41.5572464,-73.0300616,2846,New Haven County,"POLYGON ((-73.03440 41.55468, -73.03428 41.555...",17358.498149,117.144622


In [46]:
INITIAL_VIEW_STATE = pdk.ViewState(latitude=41.76375, 
                                   longitude=-72.69102, 
                                   zoom=9, max_zoom=16, pitch=60, bearing=0)

tracts = pdk.Layer(
    "GeoJsonLayer",
    ct_tracts,
    opacity=1,
    stroked=True,
    filled=True,
    extruded=True,
    wireframe=True,
    pickable=True,
    get_elevation="POPDEN", # Converting to population density per sq m to per sq mile
    get_fill_color="POPULATION==0?[0,0,0,0]:[POPDENNORM+95, POPDENNORM+95, POPDENNORM+95]",
    get_line_color="POPULATION==0?[0,0,0,0]:[POPDENNORM+50, POPDENNORM+50, POPDENNORM+50]",
)

In [47]:
r = pdk.Deck(layers=[tracts], initial_view_state=INITIAL_VIEW_STATE)

In [49]:
#r.to_html('./maps/ct_tracts.html')