In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import pickle

In [None]:
# load census geodata
census_geo = gpd.read_file('./data/processed/geo/censustracts2020.geojson') 

In [None]:
# load significant tract data
input_filepath = './data/interim/significant-tract.pkl'

# Open the pickle file in read mode
with open(input_filepath, 'rb') as file:
    sig_census_tracts = pickle.load(file)

In [52]:
#change inf values to integers
sig_census_tracts['chi2_statistic'] = sig_census_tracts['chi2_statistic'].replace(np.inf, 2300)

In [53]:
# change high chi2 values to a max number for better plotting
sig_census_tracts.loc[sig_census_tracts['chi2_statistic'] > 100, 'chi2_statistic'] = 100

In [54]:
# make BoroCT2020 column a regular column not the index
sig_census_tracts = sig_census_tracts.reset_index()

In [55]:
# subset sig_census_tracts to only have columns want to merge
chi2_only = sig_census_tracts[['BoroCT2020', 'chi2_statistic']]

In [56]:
# add chi2_statistic to census_geo
census_geo = census_geo.merge(chi2_only,
                            on='BoroCT2020',
                            how='left')

In [None]:
# save to csv
census_geo.to_csv('./data/modeling/census_geo.csv', index=False)

In [None]:
# Define the file path
output_filepath = './data/modeling/census_geo.pkl'

# Open the file in write mode
with open(output_filepath, 'wb') as file:
    # Save the processed DataFrame as a pickle object
    pickle.dump(census_geo, file)