#### Example of using crosswalks between census tracts of different years

This notebook used to provide example code for using the output from the script in creating geojsons with multiple years of census data for the same tract. As I'm migrating it to a dedicated script in main.py, it's now for testing different parts of that script.  

#### 1.) Download the crosswalks from the Azure container 

In [72]:
os.makedirs

<function os.makedirs(name, mode=511, exist_ok=False)>

In [2]:
import json
import yaml
import pandas as pd 
import geopandas
import pygris 
from census import Census 
from utils import AzureBlobStorageManager
from logger import logger
import os
import sys
import numpy as np
import geopandas as gpd
import utils

## Configure APIs 
with open('config.yaml', 'r') as file: 
    data = yaml.full_load(file)
    census_api_key = data['api-info']['census']['key']
    azure_conn_str = data['api-info']['azure']['connection-str']
    azure_container_name = data['api-info']['azure']['container-name']
    
c = Census(year=2020, key=census_api_key)
azure_manager = AzureBlobStorageManager(connection_str=azure_conn_str, 
                                        container_name=azure_container_name,
                                        download_dir='data')

In [2]:
## Download from Azure container (if required)
for fp in azure_manager.list_blobs(): 
    dl_path = os.path.join(azure_manager.download_dir, fp)
    if os.path.isfile(dl_path):
        logger.info(f'{fp} already exists in specified download directory ({azure_manager.download_dir})')
    else: 
        azure_manager.download_blob(fp)

2023-12-12 11:24:40,309 - logger - INFO - DE_2010-2020_med-hh-income_tracts.json already exists in specified download directory (data)
2023-12-12 11:24:40,310 - logger - INFO - convert-ctracts_pct-area_2010-to-2020.json already exists in specified download directory (data)
2023-12-12 11:24:40,310 - logger - INFO - convert-ctracts_pct-area_2020-to-2010.json already exists in specified download directory (data)
2023-12-12 11:24:40,311 - logger - INFO - tract_conversion_table_2010-2010_raw.csv already exists in specified download directory (data)


In [2]:
## Read in crosswalks: 
# Convert from 2010 tract values to 2020 tract values (most common use)
map_2010_to_2020 = utils.read_json_rows(os.path.join('data', 'convert-ctracts_pct-area_2010-to-2020.json'))

# Convert from 2020 tract values to 2010 tract values (not sure when we'd ever want to do this, but the file is available)
# map_2020_to_2010 = utils.read_json_rows(os.path.join('data', 'convert-ctracts_pct-area_2020-to-2010.json'))

map_2010_to_2020[:3] 

[{'STATENAME': 'Alabama',
  'GEOID_TRACT_10': '01001020100',
  'GEOID_TRACT_20_overlap': {'01001020100': 0.999, '01001020803': 0.0}},
 {'STATENAME': 'Alabama',
  'GEOID_TRACT_10': '01001020200',
  'GEOID_TRACT_20_overlap': {'01001020100': 0.0, '01001020200': 1.0}},
 {'STATENAME': 'Alabama',
  'GEOID_TRACT_10': '01001020300',
  'GEOID_TRACT_20_overlap': {'01001020300': 1.0}}]

#### 2.) Obtain your Census Tract data from multiple years

In this example we will be converting median household income from tracts in 2010-2019 to DE tracts in 2020.

I will generalize this further into a dedicated script. 

In [51]:
## Obtain yearly data redistricting (single-variable)
state_list = utils.load_state_list()

## Pull your data from census tracts of different years, e.g. median income 

CENSUS_VARS = ['B19013_001E']
YEARS = range(2010,2021)

logger.info(f'Downloading {CENSUS_VARS} from {YEARS[0]} to {YEARS[-1]}')

dataframes = []
failed_downloads = []
max_retries = 2

for state in state_list: 
    n = 0 
    retries = 0
    
    while (n < len(YEARS)):  
        logger.info(f"Downloading ({state['usps']}, {str(year)})")
        mhhi_data_year = None
        year = YEARS[n]
 
        try: 
           
            mhhi_data_year = c.acs5.state_county_tract(fields = ['NAME'] + CENSUS_VARS,
                                                state_fips = state['fips'], # Delaware
                                                county_fips = "*",
                                                tract="*",
                                                year=year)
            df = pd.DataFrame(mhhi_data_year)
            df['year'] = year
            df['state_fips'] = state['fips']
            df['state_name'] = state['name']
            df['state_usps'] = state['usps']
        
            dataframes.append(df)
            n += 1 
            retries = 0

        except Exception as e:
            retries += 1
            logger.warning(f"({state['USPS']}, {str(year)}): {str(e)} (Retrying {retries}/{max_retries})")
    
        if (mhhi_data_year is None) and (retries == max_retries):
            logger.warning(f"Failed download {retries}/{max_retries} -- skipping to next year")
            n += 1 
            retries = 0 
            failed_downloads.append({'state_name':state['name'], 'year':state['year']}) 

df = pd.concat(dataframes)
df.to_csv('temp_df_store.csv')

2023-12-13 12:10:59,869 - logger - INFO - Downloading ['B19013_001E'] from 2010 to 2020
2023-12-13 12:10:59,870 - logger - INFO - Downloading (AL, 2020)
2023-12-13 12:11:01,573 - logger - INFO - Downloading (AL, 2010)
2023-12-13 12:11:03,098 - logger - INFO - Downloading (AL, 2011)
2023-12-13 12:11:05,047 - logger - INFO - Downloading (AL, 2012)
2023-12-13 12:11:06,448 - logger - INFO - Downloading (AL, 2013)
2023-12-13 12:11:07,760 - logger - INFO - Downloading (AL, 2014)
2023-12-13 12:11:09,030 - logger - INFO - Downloading (AL, 2015)
2023-12-13 12:11:10,703 - logger - INFO - Downloading (AL, 2016)
2023-12-13 12:11:12,079 - logger - INFO - Downloading (AL, 2017)
2023-12-13 12:11:14,198 - logger - INFO - Downloading (AL, 2018)
2023-12-13 12:11:16,087 - logger - INFO - Downloading (AL, 2019)
2023-12-13 12:11:17,598 - logger - INFO - Downloading (AK, 2020)
2023-12-13 12:11:18,198 - logger - INFO - Downloading (AK, 2010)
2023-12-13 12:11:18,823 - logger - INFO - Downloading (AK, 2011)
20

In [67]:
## Obtain yearly data redistricting (multi-variable)

import us 
state_list = [{'name':x.name, 'fips':x.fips, 'usps':x.abbr} for x in us.states.STATES] \
    + [{'name':'District of Columbia', 'fips':'11', 'usps':'DC'}]

## Pull your data from census tracts of different years, e.g. median income 

CENSUS_VARS = ['B01001A_012E','B01001A_017E', 'B00001_001E']
YEARS = range(2010,2021)

logger.info(f'Downloading {CENSUS_VARS} from {YEARS[0]} to {YEARS[-1]}')

dataframes = []
failed_downloads = []
max_retries = 2

for state in state_list: 
    n = 0 
    retries = 0
    
    while (n < len(YEARS)):  
        logger.info(f"Downloading ({state['usps']}, {str(year)})")
        mhhi_data_year = None
        year = YEARS[n]
 
        try: 
           
            mhhi_data_year = c.acs5.state_county_tract(fields = ['NAME'] + CENSUS_VARS,
                                                state_fips = state['fips'], # Delaware
                                                county_fips = "*",
                                                tract="*",
                                                year=year)
            df = pd.DataFrame(mhhi_data_year)
            df['year'] = year
            df['state_fips'] = state['fips']
            df['state_name'] = state['name']
            df['state_usps'] = state['usps']
        
            dataframes.append(df)
            n += 1 
            retries = 0

        except Exception as e:
            retries += 1
            logger.warning(f"({state['usps']}, {str(year)}): {str(e)} (Retrying {retries}/{max_retries})")
    
        if (mhhi_data_year is None) and (retries == max_retries):
            logger.warning(f"Failed download {retries}/{max_retries} -- skipping to next year")
            n += 1 
            retries = 0 
            failed_downloads.append({'state_name':state['name'], 'year':year})
    break

df = pd.concat(dataframes)
df.to_csv('temp_df_store_multi-var.csv')

2023-12-13 12:43:55,012 - logger - INFO - Downloading ['B01001A_012E', 'B01001A_017E', 'B00001_001E'] from 2010 to 2020
2023-12-13 12:43:55,015 - logger - INFO - Downloading (AL, 2019)
2023-12-13 12:43:55,682 - logger - INFO - Downloading (AL, 2010)
2023-12-13 12:43:56,287 - logger - INFO - Downloading (AL, 2011)
2023-12-13 12:43:56,903 - logger - INFO - Downloading (AL, 2012)
2023-12-13 12:43:57,503 - logger - INFO - Downloading (AL, 2013)
2023-12-13 12:43:58,127 - logger - INFO - Downloading (AL, 2014)
2023-12-13 12:43:58,841 - logger - INFO - Downloading (AL, 2015)
2023-12-13 12:43:59,632 - logger - INFO - Downloading (AL, 2016)
2023-12-13 12:44:00,565 - logger - INFO - Downloading (AL, 2017)
2023-12-13 12:44:01,294 - logger - INFO - Downloading (AL, 2018)
2023-12-13 12:44:01,330 - logger - INFO - Downloading (AL, 2019)
2023-12-13 12:44:01,364 - logger - INFO - Downloading (AL, 2019)
2023-12-13 12:44:01,397 - logger - INFO - Downloading (AL, 2020)


In [9]:
## Single var test 
df = pd.read_csv('temp_df_store.csv', index_col=0, dtype={'state':str,'county':str,'state_fips':str})
CENSUS_VARS =  ['B19013_001E']

# Split/edit location columns:  
df[['tract_dec', 'county_name', 'STATENAME']] = df['NAME'].str.split(', ', expand=True)
# Check that state names line up 
state_errors = df[df['STATENAME'] != df['state_name']]
if state_errors.shape[0] > 0: 
    # Kills script -- inspect the raw file in `output_path` 
    logger.exception(f'States do not match expected values in {state_errors.shape[0]} rows:\n {utils.df_to_print(state_errors, rows=min(state_errors.shape[0], 20))}')
df['tract_dec'] = df['tract_dec'].str.lstrip('Census Tract ').str.strip()
df['county_name'] = df['county_name'].str.rstrip('County')
df.rename({'county':'county_fips'}, axis=1, inplace=True)

# Dropping/reordering columns 
df = df[['state_fips', 'state_name', 'county_fips', 'county_name', 'tract', 'tract_dec', 'year'] + CENSUS_VARS]

# Handle NaN values:  
# Negative values are sometimes used to signal missing data -- replacing these with NaNs
rows_w_negative = df[df[CENSUS_VARS].apply(lambda row: any(val < 0 for val in row.values), axis=1)]
# rows_w_negative
n_rows_w_negative = rows_w_negative.shape[0]
if n_rows_w_negative > 0: 
    logger.info(f'{n_rows_w_negative} rows have negative placeholder values for census variables:\n {utils.df_to_print(rows_w_negative.value_counts().reset_index(), index=True)}')
    logger.info(f'Replacing these values with NaN.')
    df[CENSUS_VARS] = df[CENSUS_VARS].applymap(lambda x: np.nan if x < 0 else x)
nan_counts = df.isna().sum()
logger.info(f'NaN counts:\n {utils.df_to_print(nan_counts)}')

# Handle Duplicates
dups = df[df.duplicated()]
if dups.shape[0] > 0: 
    logger.info(f'{dups.shape[0]} duplicate rows (dropping)')
    df = df[~df.index.isin(dups.index)]

logger.info(f'Transformed Census data (Long):\n{utils.df_to_print(df)}\n{df.shape}')

## Widen dataframe - test that this works for one census variable and multiple census variables
df_wider = pd.pivot(data=df, 
            index=df.filter(regex='state|tract|county').columns,
            columns=['year'], 
            values=CENSUS_VARS)

df_wider

2023-12-13 13:15:21,265 - logger - INFO - 11041 rows have negative placeholder values for census variables:
 , state_fips, state_name, county_fips, county_name, tract, tract_dec, year, B19013_001E, count
0, 01, Alabama, 003, Baldwin , 990000, 9900, 2010, -666666666.0, 1
1, 36, New York, 081, Queens , 22900, 229, 2011, -666666666.0, 1
2, 36, New York, 081, Queens , 21900, 219, 2014, -666666666.0, 1
3, 36, New York, 081, Queens , 21900, 219, 2015, -666666666.0, 1

2023-12-13 13:15:21,265 - logger - INFO - Replacing these values with NaN.
2023-12-13 13:15:21,452 - logger - INFO - NaN counts:
 0
0
0
0
0

2023-12-13 13:15:23,119 - logger - INFO - Transformed Census data (Long):
state_fips, state_name, county_fips, county_name, tract, tract_dec, year, B19013_001E
01, Alabama, 001, Autauga , 20100, 201, 2010, 70222.0
01, Alabama, 001, Autauga , 20200, 202, 2010, 41091.0
01, Alabama, 001, Autauga , 20300, 203, 2010, 44031.0
01, Alabama, 001, Autauga , 20400, 204, 2010, 56627.0

(814975, 8)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E,B19013_001E
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,year,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
01,Alabama,001,Autauga,20100,201,70222.0,58846.0,56500.0,63030.0,60000.0,61838.0,66000.0,67826.0,58625.0,60208.0,60388.0
01,Alabama,001,Autauga,20200,202,41091.0,42019.0,41250.0,44019.0,42971.0,32303.0,41107.0,41287.0,43531.0,43958.0,49144.0
01,Alabama,001,Autauga,20300,203,44031.0,43145.0,43088.0,43201.0,43717.0,44922.0,51250.0,46806.0,51875.0,55345.0,62423.0
01,Alabama,001,Autauga,20400,204,56627.0,58419.0,54503.0,54730.0,55814.0,54329.0,52704.0,55895.0,54050.0,59663.0,64310.0
01,Alabama,001,Autauga,20500,205,68317.0,68301.0,65027.0,65132.0,57349.0,51965.0,52463.0,68143.0,72417.0,66108.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,Wyoming,043,Washakie,200,2,53686.0,60560.0,52804.0,55938.0,51799.0,54545.0,51024.0,59706.0,54936.0,55859.0,55268.0
56,Wyoming,043,Washakie,301,3.01,40304.0,40086.0,37700.0,34619.0,35691.0,34643.0,38221.0,41496.0,50428.0,46500.0,52074.0
56,Wyoming,043,Washakie,302,3.02,56086.0,61298.0,58026.0,58534.0,49844.0,55192.0,52581.0,52214.0,56141.0,58634.0,65543.0
56,Wyoming,045,Weston,951100,9511,52440.0,64103.0,64412.0,66653.0,66515.0,69222.0,69048.0,62435.0,50000.0,52763.0,51694.0


In [16]:
fixed_columns = ['-'.join((cvar, str(year))) for (cvar, year) in df_wider.columns]
fixed_columns 

['B19013_001E-2010',
 'B19013_001E-2011',
 'B19013_001E-2012',
 'B19013_001E-2013',
 'B19013_001E-2014',
 'B19013_001E-2015',
 'B19013_001E-2016',
 'B19013_001E-2017',
 'B19013_001E-2018',
 'B19013_001E-2019',
 'B19013_001E-2020']

In [7]:
utils.df_to_print(rows_w_negative.value_counts().reset_index())

'state_fips, state_name, county_fips, county_name, tract, tract_dec, year, B19013_001E, count\n01, Alabama, 003, Baldwin , 990000, 9900, 2010, -666666666.0, 1\n36, New York, 081, Queens , 22900, 229, 2011, -666666666.0, 1\n36, New York, 081, Queens , 21900, 219, 2014, -666666666.0, 1\n36, New York, 081, Queens , 21900, 219, 2015, -666666666.0, 1\n'

In [5]:
## Multi var check

df = pd.read_csv('temp_df_store_multi-var.csv', index_col=0, dtype={'state':str,'county':str,'state_fips':str})
CENSUS_VARS =  ['B01001A_012E','B01001A_017E', 'B00001_001E']

# Split/edit location columns:  
df[['tract_dec', 'county_name', 'STATENAME']] = df['NAME'].str.split(', ', expand=True)
# Check that state names line up 
state_errors = df[df['STATENAME'] != df['state_name']]
if state_errors.shape[0] > 0: 
    # Kills script -- inspect the raw file in `output_path` 
    logger.exception(f'States do not match expected values in {state_errors.shape[0]} rows:\n {utils.df_to_print(state_errors, rows=min(state_errors.shape[0], 20))}')
df['tract_dec'] = df['tract_dec'].str.lstrip('Census Tract ').str.strip()
df['county_name'] = df['county_name'].str.rstrip('County')
df.rename({'county':'county_fips'}, axis=1, inplace=True)

# Dropping/reordering columns 
df = df[['state_fips', 'state_name', 'county_fips', 'county_name', 'tract', 'tract_dec', 'year'] + CENSUS_VARS]

# Handle NaN values:  
# Negative values are sometimes used to signal missing data -- replacing these with NaNs
rows_w_negative = df[df[CENSUS_VARS].apply(lambda row: any(val < 0 for val in row.values), axis=1)]
# rows_w_negative
n_rows_w_negative = rows_w_negative.shape[0]
if n_rows_w_negative > 0: 
    logger.info(f'{n_rows_w_negative} rows have negative placeholder values for census variables:\n {utils.df_to_print(rows_w_negative.value_counts().reset_index())}')
    logger.info(f'Replacing these values with NaN.')
    df[CENSUS_VARS] = df[CENSUS_VARS].applymap(lambda x: np.nan if x < 0 else x)
nan_counts = df.isna().sum()
logger.info(f'NaN counts:\n {utils.df_to_print(nan_counts)}')

# Handle Duplicates
dups = df[df.duplicated()]
if dups.shape[0] > 0: 
    logger.info(f'{dups.shape[0]} duplicate rows (dropping)')
    df = df[~df.index.isin(dups.index)]

logger.info(f'Transformed Census data (Long):\n{utils.df_to_print(df)}\n{df.shape}')

## Widen dataframe - test that this works for one census variable and multiple census variables
df_wider = pd.pivot(data=df, 
            index=df.filter(regex='state|tract|county').columns,
            columns=['year'], 
            values=CENSUS_VARS)

fixed_columns = ['-'.join((cvar, str(year))) for (cvar, year) in df_wider.columns]
df_wider.columns = fixed_columns

df_wider

2023-12-13 14:39:01,307 - logger - INFO - NaN counts:
 0
0
0
0
0

2023-12-13 14:39:01,340 - logger - INFO - Transformed Census data (Long):
state_fips, state_name, county_fips, county_name, tract, tract_dec, year, B01001A_012E, B01001A_017E, B00001_001E
01, Alabama, 001, Autauga , 20100, 201, 2010, 82.0, 726.0, 149.0
01, Alabama, 001, Autauga , 20200, 202, 2010, 10.0, 419.0, 141.0
01, Alabama, 001, Autauga , 20300, 203, 2010, 190.0, 1633.0, 273.0
01, Alabama, 001, Autauga , 20400, 204, 2010, 254.0, 2514.0, 1079.0

(10629, 10)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E-2010,B01001A_012E-2011,B01001A_012E-2012,B01001A_012E-2013,B01001A_012E-2014,B01001A_012E-2015,B01001A_012E-2016,B01001A_012E-2017,B01001A_012E-2018,B01001A_017E-2010,...,B01001A_017E-2018,B00001_001E-2010,B00001_001E-2011,B00001_001E-2012,B00001_001E-2013,B00001_001E-2014,B00001_001E-2015,B00001_001E-2016,B00001_001E-2017,B00001_001E-2018
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
1,Alabama,001,Autauga,20100,201,82.0,100.0,93.0,76.0,74.0,84.0,86.0,123.0,140.0,726.0,...,853.0,149.0,180.0,195.0,208.0,232.0,251.0,245.0,239.0,250.0
1,Alabama,001,Autauga,20200,202,10.0,18.0,14.0,17.0,34.0,22.0,70.0,84.0,111.0,419.0,...,354.0,141.0,200.0,222.0,249.0,261.0,274.0,267.0,250.0,250.0
1,Alabama,001,Autauga,20300,203,190.0,217.0,158.0,186.0,161.0,132.0,128.0,135.0,157.0,1633.0,...,1295.0,273.0,279.0,261.0,264.0,264.0,252.0,258.0,282.0,300.0
1,Alabama,001,Autauga,20400,204,254.0,261.0,271.0,293.0,208.0,232.0,208.0,194.0,151.0,2514.0,...,1728.0,1079.0,905.0,764.0,643.0,499.0,375.0,371.0,358.0,350.0
1,Alabama,001,Autauga,20500,205,731.0,675.0,659.0,678.0,603.0,480.0,616.0,504.0,610.0,4208.0,...,3543.0,643.0,590.0,540.0,473.0,391.0,325.0,317.0,320.0,350.0
1,Alabama,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,300.0,360.0,364.0,340.0,266.0,258.0,219.0,202.0,111.0,1503.0,...,1348.0,176.0,179.0,207.0,238.0,255.0,283.0,309.0,305.0,300.0
1,Alabama,133,Winston,965600,9656,508.0,404.0,316.0,259.0,405.0,401.0,427.0,469.0,496.0,2858.0,...,2329.0,387.0,434.0,417.0,421.0,425.0,383.0,375.0,382.0,400.0
1,Alabama,133,Winston,965700,9657,257.0,286.0,371.0,331.0,297.0,312.0,321.0,237.0,281.0,2262.0,...,2054.0,224.0,249.0,299.0,315.0,360.0,392.0,409.0,367.0,350.0
1,Alabama,133,Winston,965800,9658,317.0,275.0,290.0,318.0,311.0,308.0,304.0,302.0,228.0,2060.0,...,2179.0,224.0,249.0,282.0,306.0,330.0,347.0,358.0,352.0,350.0


In [24]:
CENSUS_VARS

['B01001A_012E', 'B01001A_017E', 'B00001_001E']

In [61]:
df_wider

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E-2010,B01001A_012E-2011,B01001A_012E-2012,B01001A_012E-2013,B01001A_012E-2014,B01001A_012E-2015,B01001A_012E-2016,B01001A_012E-2017,B01001A_012E-2018,B01001A_017E-2010,...,B01001A_017E-2018,B00001_001E-2010,B00001_001E-2011,B00001_001E-2012,B00001_001E-2013,B00001_001E-2014,B00001_001E-2015,B00001_001E-2016,B00001_001E-2017,B00001_001E-2018
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
1,Alabama,001,Autauga,20100,201,82.0,100.0,93.0,76.0,74.0,84.0,86.0,123.0,140.0,726.0,...,853.0,149.0,180.0,195.0,208.0,232.0,251.0,245.0,239.0,250.0
1,Alabama,001,Autauga,20200,202,10.0,18.0,14.0,17.0,34.0,22.0,70.0,84.0,111.0,419.0,...,354.0,141.0,200.0,222.0,249.0,261.0,274.0,267.0,250.0,250.0
1,Alabama,001,Autauga,20300,203,190.0,217.0,158.0,186.0,161.0,132.0,128.0,135.0,157.0,1633.0,...,1295.0,273.0,279.0,261.0,264.0,264.0,252.0,258.0,282.0,300.0
1,Alabama,001,Autauga,20400,204,254.0,261.0,271.0,293.0,208.0,232.0,208.0,194.0,151.0,2514.0,...,1728.0,1079.0,905.0,764.0,643.0,499.0,375.0,371.0,358.0,350.0
1,Alabama,001,Autauga,20500,205,731.0,675.0,659.0,678.0,603.0,480.0,616.0,504.0,610.0,4208.0,...,3543.0,643.0,590.0,540.0,473.0,391.0,325.0,317.0,320.0,350.0
1,Alabama,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,300.0,360.0,364.0,340.0,266.0,258.0,219.0,202.0,111.0,1503.0,...,1348.0,176.0,179.0,207.0,238.0,255.0,283.0,309.0,305.0,300.0
1,Alabama,133,Winston,965600,9656,508.0,404.0,316.0,259.0,405.0,401.0,427.0,469.0,496.0,2858.0,...,2329.0,387.0,434.0,417.0,421.0,425.0,383.0,375.0,382.0,400.0
1,Alabama,133,Winston,965700,9657,257.0,286.0,371.0,331.0,297.0,312.0,321.0,237.0,281.0,2262.0,...,2054.0,224.0,249.0,299.0,315.0,360.0,392.0,409.0,367.0,350.0
1,Alabama,133,Winston,965800,9658,317.0,275.0,290.0,318.0,311.0,308.0,304.0,302.0,228.0,2060.0,...,2179.0,224.0,249.0,282.0,306.0,330.0,347.0,358.0,352.0,350.0


In [6]:
## Idea one: loop through the census variables and collapse each into its own column (keeps lambda function the same as from before)
df_wider2 = df_wider.copy()
for var in CENSUS_VARS: 
    print(var)
    var_year_columns = df_wider2.filter(regex=var).columns
    df_wider2[var] = df_wider2[var_year_columns] \
        .apply(lambda row: {str(col.split('-')[1]):np.round(row_value, 2) for col, row_value in row.items()}, axis=1)
    # x.name = var 
    # print(x)
    df_wider2.drop(var_year_columns, axis=1, inplace=True)

df_wider2
    # display(x)

B01001A_012E
B01001A_017E
B00001_001E


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E,B01001A_017E,B00001_001E
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Alabama,001,Autauga,20100,201,"{'2010': 82.0, '2011': 100.0, '2012': 93.0, '2...","{'2010': 726.0, '2011': 718.0, '2012': 759.0, ...","{'2010': 149.0, '2011': 180.0, '2012': 195.0, ..."
1,Alabama,001,Autauga,20200,202,"{'2010': 10.0, '2011': 18.0, '2012': 14.0, '20...","{'2010': 419.0, '2011': 461.0, '2012': 393.0, ...","{'2010': 141.0, '2011': 200.0, '2012': 222.0, ..."
1,Alabama,001,Autauga,20300,203,"{'2010': 190.0, '2011': 217.0, '2012': 158.0, ...","{'2010': 1633.0, '2011': 1440.0, '2012': 1412....","{'2010': 273.0, '2011': 279.0, '2012': 261.0, ..."
1,Alabama,001,Autauga,20400,204,"{'2010': 254.0, '2011': 261.0, '2012': 271.0, ...","{'2010': 2514.0, '2011': 2237.0, '2012': 2190....","{'2010': 1079.0, '2011': 905.0, '2012': 764.0,..."
1,Alabama,001,Autauga,20500,205,"{'2010': 731.0, '2011': 675.0, '2012': 659.0, ...","{'2010': 4208.0, '2011': 4118.0, '2012': 4187....","{'2010': 643.0, '2011': 590.0, '2012': 540.0, ..."
1,Alabama,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,"{'2010': 300.0, '2011': 360.0, '2012': 364.0, ...","{'2010': 1503.0, '2011': 1393.0, '2012': 1430....","{'2010': 176.0, '2011': 179.0, '2012': 207.0, ..."
1,Alabama,133,Winston,965600,9656,"{'2010': 508.0, '2011': 404.0, '2012': 316.0, ...","{'2010': 2858.0, '2011': 2748.0, '2012': 2732....","{'2010': 387.0, '2011': 434.0, '2012': 417.0, ..."
1,Alabama,133,Winston,965700,9657,"{'2010': 257.0, '2011': 286.0, '2012': 371.0, ...","{'2010': 2262.0, '2011': 2276.0, '2012': 2331....","{'2010': 224.0, '2011': 249.0, '2012': 299.0, ..."
1,Alabama,133,Winston,965800,9658,"{'2010': 317.0, '2011': 275.0, '2012': 290.0, ...","{'2010': 2060.0, '2011': 2163.0, '2012': 2224....","{'2010': 224.0, '2011': 249.0, '2012': 282.0, ..."


#### 3.) Apply the years crosswalk to your data

In [107]:
# Create map dataframe
df_map_2010_to_2020 = pd.DataFrame(map_2010_to_2020).set_index(['GEOID_TRACT_10'])
display(df_map_2010_to_2020)

Unnamed: 0_level_0,STATENAME,GEOID_TRACT_20_overlap
GEOID_TRACT_10,Unnamed: 1_level_1,Unnamed: 2_level_1
01001020100,Alabama,"{'01001020100': 99.909, '01001020803': 0.001}"
01001020200,Alabama,"{'01001020100': 0.049, '01001020200': 99.96}"
01001020300,Alabama,{'01001020300': 100.0}
01001020400,Alabama,{'01001020400': 99.991}
01001020500,Alabama,"{'01001020501': 100.0, '01001020502': 100.0, '..."
...,...,...
56043000200,Wyoming,"{'56043000200': 99.996, '56043000302': 0.826}"
56043000301,Wyoming,{'56043000301': 100.0}
56043000302,Wyoming,"{'56043000200': 0.004, '56043000302': 99.174}"
56045951100,Wyoming,"{'56045951100': 100.0, '56045951300': 0.016}"


In [83]:
[''.join([idx[0], idx[2], str(idx[4])]) for idx in df_wider.index]

['0100120100',
 '0100120200',
 '0100120300',
 '0100120400',
 '0100120500',
 '0100120600',
 '0100120700',
 '0100120801',
 '0100120802',
 '0100120900',
 '0100121000',
 '0100121100',
 '0100310100',
 '0100310200',
 '0100310300',
 '0100310400',
 '0100310500',
 '0100310600',
 '0100310701',
 '0100310703',
 '0100310704',
 '0100310705',
 '0100310800',
 '0100310903',
 '0100310904',
 '0100310905',
 '0100310906',
 '0100311000',
 '0100311101',
 '0100311102',
 '0100311201',
 '0100311202',
 '0100311300',
 '0100311401',
 '0100311403',
 '0100311405',
 '0100311406',
 '0100311407',
 '0100311408',
 '0100311501',
 '0100311502',
 '0100311601',
 '0100311602',
 '01003990000',
 '01005950100',
 '01005950200',
 '01005950300',
 '01005950400',
 '01005950500',
 '01005950600',
 '01005950700',
 '01005950800',
 '01005950900',
 '0100710001',
 '0100710002',
 '0100710003',
 '0100710004',
 '0100950101',
 '0100950102',
 '0100950200',
 '0100950300',
 '0100950400',
 '0100950500',
 '0100950601',
 '0100950602',
 '0100950700',


In [85]:
df_wider2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E,B01001A_017E,B00001_001E
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Alabama,001,Autauga,20100,201,"{'2010': 82.0, '2011': 100.0, '2012': 93.0, '2...","{'2010': 726.0, '2011': 718.0, '2012': 759.0, ...","{'2010': 149.0, '2011': 180.0, '2012': 195.0, ..."
1,Alabama,001,Autauga,20200,202,"{'2010': 10.0, '2011': 18.0, '2012': 14.0, '20...","{'2010': 419.0, '2011': 461.0, '2012': 393.0, ...","{'2010': 141.0, '2011': 200.0, '2012': 222.0, ..."
1,Alabama,001,Autauga,20300,203,"{'2010': 190.0, '2011': 217.0, '2012': 158.0, ...","{'2010': 1633.0, '2011': 1440.0, '2012': 1412....","{'2010': 273.0, '2011': 279.0, '2012': 261.0, ..."
1,Alabama,001,Autauga,20400,204,"{'2010': 254.0, '2011': 261.0, '2012': 271.0, ...","{'2010': 2514.0, '2011': 2237.0, '2012': 2190....","{'2010': 1079.0, '2011': 905.0, '2012': 764.0,..."
1,Alabama,001,Autauga,20500,205,"{'2010': 731.0, '2011': 675.0, '2012': 659.0, ...","{'2010': 4208.0, '2011': 4118.0, '2012': 4187....","{'2010': 643.0, '2011': 590.0, '2012': 540.0, ..."
1,Alabama,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,"{'2010': 300.0, '2011': 360.0, '2012': 364.0, ...","{'2010': 1503.0, '2011': 1393.0, '2012': 1430....","{'2010': 176.0, '2011': 179.0, '2012': 207.0, ..."
1,Alabama,133,Winston,965600,9656,"{'2010': 508.0, '2011': 404.0, '2012': 316.0, ...","{'2010': 2858.0, '2011': 2748.0, '2012': 2732....","{'2010': 387.0, '2011': 434.0, '2012': 417.0, ..."
1,Alabama,133,Winston,965700,9657,"{'2010': 257.0, '2011': 286.0, '2012': 371.0, ...","{'2010': 2262.0, '2011': 2276.0, '2012': 2331....","{'2010': 224.0, '2011': 249.0, '2012': 299.0, ..."
1,Alabama,133,Winston,965800,9658,"{'2010': 317.0, '2011': 275.0, '2012': 290.0, ...","{'2010': 2060.0, '2011': 2163.0, '2012': 2224....","{'2010': 224.0, '2011': 249.0, '2012': 282.0, ..."


In [None]:
from tract_crosswalk import get_tract_crosswalks 
map_10_to_20 = get_tract_crosswalks()[0] # We want the crosswalk from 2010 to 2020. TO-DO: set the direction of the conversion as a parameter in config.yaml, pass into get_tract_crosswalks
# df_map_10_to_20 = pd.DataFrame(map_10_to_20)

In [21]:
df_map_10_to_20 = pd.DataFrame(map_10_to_20).set_index(['GEOID_TRACT_10'])
df_map_10_to_20.dtypes

STATENAME                 object
GEOID_TRACT_20_overlap    object
dtype: object

In [14]:
df_wider2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E,B01001A_017E,B00001_001E,GEOID_TRACT_10
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Alabama,001,Autauga,20100,201,"{'2010': 82.0, '2011': 100.0, '2012': 93.0, '2...","{'2010': 726.0, '2011': 718.0, '2012': 759.0, ...","{'2010': 149.0, '2011': 180.0, '2012': 195.0, ...",0100120100
1,Alabama,001,Autauga,20200,202,"{'2010': 10.0, '2011': 18.0, '2012': 14.0, '20...","{'2010': 419.0, '2011': 461.0, '2012': 393.0, ...","{'2010': 141.0, '2011': 200.0, '2012': 222.0, ...",0100120200
1,Alabama,001,Autauga,20300,203,"{'2010': 190.0, '2011': 217.0, '2012': 158.0, ...","{'2010': 1633.0, '2011': 1440.0, '2012': 1412....","{'2010': 273.0, '2011': 279.0, '2012': 261.0, ...",0100120300
1,Alabama,001,Autauga,20400,204,"{'2010': 254.0, '2011': 261.0, '2012': 271.0, ...","{'2010': 2514.0, '2011': 2237.0, '2012': 2190....","{'2010': 1079.0, '2011': 905.0, '2012': 764.0,...",0100120400
1,Alabama,001,Autauga,20500,205,"{'2010': 731.0, '2011': 675.0, '2012': 659.0, ...","{'2010': 4208.0, '2011': 4118.0, '2012': 4187....","{'2010': 643.0, '2011': 590.0, '2012': 540.0, ...",0100120500
1,Alabama,...,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,"{'2010': 300.0, '2011': 360.0, '2012': 364.0, ...","{'2010': 1503.0, '2011': 1393.0, '2012': 1430....","{'2010': 176.0, '2011': 179.0, '2012': 207.0, ...",01133965503
1,Alabama,133,Winston,965600,9656,"{'2010': 508.0, '2011': 404.0, '2012': 316.0, ...","{'2010': 2858.0, '2011': 2748.0, '2012': 2732....","{'2010': 387.0, '2011': 434.0, '2012': 417.0, ...",01133965600
1,Alabama,133,Winston,965700,9657,"{'2010': 257.0, '2011': 286.0, '2012': 371.0, ...","{'2010': 2262.0, '2011': 2276.0, '2012': 2331....","{'2010': 224.0, '2011': 249.0, '2012': 299.0, ...",01133965700
1,Alabama,133,Winston,965800,9658,"{'2010': 317.0, '2011': 275.0, '2012': 290.0, ...","{'2010': 2060.0, '2011': 2163.0, '2012': 2224....","{'2010': 224.0, '2011': 249.0, '2012': 282.0, ...",01133965800


In [22]:
df_map_10_to_20

Unnamed: 0_level_0,STATENAME,GEOID_TRACT_20_overlap
GEOID_TRACT_10,Unnamed: 1_level_1,Unnamed: 2_level_1
01001020100,Alabama,"{'01001020100': 0.999, '01001020803': 0.0}"
01001020200,Alabama,"{'01001020100': 0.0, '01001020200': 1.0}"
01001020300,Alabama,{'01001020300': 1.0}
01001020400,Alabama,{'01001020400': 1.0}
01001020500,Alabama,"{'01001020501': 1.0, '01001020502': 1.0, '0100..."
...,...,...
56043000200,Wyoming,"{'56043000200': 1.0, '56043000302': 0.008}"
56043000301,Wyoming,{'56043000301': 1.0}
56043000302,Wyoming,"{'56043000200': 0.0, '56043000302': 0.992}"
56045951100,Wyoming,"{'56045951100': 1.0, '56045951300': 0.0}"


In [25]:
# Join with pulled data 
df_wider2['GEOID_TRACT_10'] = [''.join([idx[0], idx[2], str(idx[4])]) for idx in df_wider2.index]
df_wider2

joined = df_wider2.merge(df_map_10_to_20, how='left', left_on=['GEOID_TRACT_10'], right_index=True).drop(['STATENAME'], axis=1)
joined

# print('Joining Crosswalk')
# display(joined.head())

# ## Row-wise apply function (issue w/ creating these row-wise dicts is recombining values from same 2020 tracts scattered between multiple rows)
# # def apply_crosswalk(raw_values, overlap_map, dec_round=2):
# #     conversion_dict = {}
# #     for tract_2020, pct_overlap in overlap_map.items(): 
# #         if pct_overlap > 1: 
# #             # This would be an older version of the conversion map before I removed \'* 100\' from the pct_overlap function
# #             pct_overlap /= 100
# #         conversion_dict[tract_2020] = {year:(val * pct_overlap).round(dec_round) for year,val in raw_values.items()}
# #     return conversion_dict

# # joined['GEOID_TRACT_20_conversion'] = joined.apply(lambda row: apply_crosswalk(row['income_raw'], row['GEOID_TRACT_20_overlap']), axis=1)

## Column-wise function 
def apply_crosswalk(raw_values, overlaps) -> dict: 
    """Create historical data for 2020 Tracts by multiplying past year's values by their respective crosswalk weights"""
    output_dict = {}
    for rv, ov in zip(raw_values, overlaps):
        for tract_2020, pct in ov.items(): 
            # Convert the raw values for the current 2020 census tract
            pct = pct if pct <= 1 else pct / 100
            converted_raw_values = {year:(val*pct).round(2) for year, val in rv.items()}
            # Add values to the output dictionary 
            if tract_2020 in output_dict.keys(): 
                # Add to the values in the current dictionary
                for year in output_dict[tract_2020].keys(): 
                    output_dict[tract_2020][year] += converted_raw_values[year]
            else: 
                output_dict[tract_2020] = converted_raw_values
    return output_dict

print('Create New DF by multiplying year variables by crosswalk weights')
df = pd.DataFrame(apply_crosswalk(joined['income_raw'], joined['GEOID_TRACT_20_overlap'])).T.rename_axis('GEOID_TRACT_20')
# Also collapsing this one
df = df.apply(lambda row: {str(col):np.round(row_value, 2) for col, row_value in row.items()}, axis=1)\
    .reset_index()\
    .rename({0:'B19013_001E'}, axis=1)

# df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,B01001A_012E,B01001A_017E,B00001_001E,GEOID_TRACT_10,GEOID_TRACT_20_overlap
state_fips,state_name,county_fips,county_name,tract,tract_dec,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Alabama,001,Autauga,20100,201,"{'2010': 82.0, '2011': 100.0, '2012': 93.0, '2...","{'2010': 726.0, '2011': 718.0, '2012': 759.0, ...","{'2010': 149.0, '2011': 180.0, '2012': 195.0, ...",0100120100,
1,Alabama,001,Autauga,20200,202,"{'2010': 10.0, '2011': 18.0, '2012': 14.0, '20...","{'2010': 419.0, '2011': 461.0, '2012': 393.0, ...","{'2010': 141.0, '2011': 200.0, '2012': 222.0, ...",0100120200,
1,Alabama,001,Autauga,20300,203,"{'2010': 190.0, '2011': 217.0, '2012': 158.0, ...","{'2010': 1633.0, '2011': 1440.0, '2012': 1412....","{'2010': 273.0, '2011': 279.0, '2012': 261.0, ...",0100120300,
1,Alabama,001,Autauga,20400,204,"{'2010': 254.0, '2011': 261.0, '2012': 271.0, ...","{'2010': 2514.0, '2011': 2237.0, '2012': 2190....","{'2010': 1079.0, '2011': 905.0, '2012': 764.0,...",0100120400,
1,Alabama,001,Autauga,20500,205,"{'2010': 731.0, '2011': 675.0, '2012': 659.0, ...","{'2010': 4208.0, '2011': 4118.0, '2012': 4187....","{'2010': 643.0, '2011': 590.0, '2012': 540.0, ...",0100120500,
1,Alabama,...,...,...,...,...,...,...,...,...
1,Alabama,133,Winston,965503,9655.03,"{'2010': 300.0, '2011': 360.0, '2012': 364.0, ...","{'2010': 1503.0, '2011': 1393.0, '2012': 1430....","{'2010': 176.0, '2011': 179.0, '2012': 207.0, ...",01133965503,{'01133965503': 1.0}
1,Alabama,133,Winston,965600,9656,"{'2010': 508.0, '2011': 404.0, '2012': 316.0, ...","{'2010': 2858.0, '2011': 2748.0, '2012': 2732....","{'2010': 387.0, '2011': 434.0, '2012': 417.0, ...",01133965600,"{'01133965501': 0.0, '01133965502': 0.003, '01..."
1,Alabama,133,Winston,965700,9657,"{'2010': 257.0, '2011': 286.0, '2012': 371.0, ...","{'2010': 2262.0, '2011': 2276.0, '2012': 2331....","{'2010': 224.0, '2011': 249.0, '2012': 299.0, ...",01133965700,{'01133965700': 1.0}
1,Alabama,133,Winston,965800,9658,"{'2010': 317.0, '2011': 275.0, '2012': 290.0, ...","{'2010': 2060.0, '2011': 2163.0, '2012': 2224....","{'2010': 224.0, '2011': 249.0, '2012': 282.0, ...",01133965800,{'01133965800': 0.999}


#### 4.) Obtain geometries  


In [225]:
import pygris 

tract_geoms = pygris.tracts(year=2020, state='DE').set_index('GEOID')
tract_geoms

county_codes = pygris.counties(year=2020, state='DE') # to get the county names -- not included in the above set
county_codes = county_codes[['COUNTYFP', 'NAME']]

Using FIPS code '10' for input 'DE'
Using FIPS code '10' for input 'DE'


In [232]:
df_geoms = df.merge(tract_geoms[['STATEFP', 'COUNTYFP', 'TRACTCE', 'NAME', 'geometry']], 
                    how='left', left_on=['GEOID_TRACT_20'], right_index=True) \
    .rename({'NAME':'TRACT_DEC'}, axis=1) \
             .merge(county_codes, how='left', on='COUNTYFP') \
    .rename({'NAME':'COUNTY_NAME'}, axis=1) \
    
df_geoms = gpd.GeoDataFrame(df_geoms[['STATEFP', 'COUNTYFP', 'COUNTY_NAME','TRACTCE','B19013_001E','geometry']])

df_geoms

Unnamed: 0,STATEFP,COUNTYFP,COUNTY_NAME,TRACTCE,B19013_001E,geometry
0,10,001,Kent,040100,"{'2010': 63292.0, '2011': 70055.0, '2012': 742...","POLYGON ((-75.76002 39.29682, -75.75626 39.297..."
1,10,001,Kent,040201,"{'2010': 56231.68, '2011': 58961.56, '2012': 5...","POLYGON ((-75.66581 39.29057, -75.66547 39.290..."
2,10,001,Kent,040204,"{'2010': 62520.47, '2011': 67979.56, '2012': 7...","POLYGON ((-75.63471 39.27722, -75.63469 39.277..."
3,10,003,New Castle,016901,"{'2010': 55466.35, '2011': 60117.56, '2012': 5...","POLYGON ((-75.76010 39.29715, -75.75999 39.297..."
4,10,001,Kent,040205,"{'2010': 62183.25, '2011': 67625.91, '2012': 7...","POLYGON ((-75.61400 39.27423, -75.61211 39.275..."
...,...,...,...,...,...,...
258,10,005,Sussex,051702,"{'2010': 56332.32, '2011': 61039.23, '2012': 6...","POLYGON ((-75.55813 38.45574, -75.55812 38.455..."
259,10,005,Sussex,051801,"{'2010': 44882.0, '2011': 45027.0, '2012': 489...","POLYGON ((-75.70157 38.56074, -75.70041 38.560..."
260,10,005,Sussex,051802,"{'2010': 40513.54, '2011': 41602.42, '2012': 3...","POLYGON ((-75.59122 38.53835, -75.59110 38.538..."
261,10,005,Sussex,051900,"{'2010': 45815.42, '2011': 45049.49, '2012': 4...","POLYGON ((-75.69854 38.52205, -75.69571 38.521..."


#### 5.) Upload to Azure 


In [233]:
## Save locally first to upload 
fp = os.path.join('data', 'DE_2010-2020_med-hh-income_tracts.json')
df_geoms.to_file(fp)

azure_manager.upload_blob(fp)

Blob DE_2010-2020_med-hh-income_tracts.json uploaded successfully.


In [37]:
fp = os.path.join('data', 'DE_2010-2020_med-hh-income_tracts.json')
df = gpd.read_file(fp)

In [47]:
df.memory_usage(deep=True).sum() / 1000 

159.59

In [240]:
blob_client = azure_manager.container_client.get_blob_client(os.path.basename(fp))

print(blob_client.url)

https://pipelinemapping.blob.core.windows.net/std-geoms/DE_2010-2020_med-hh-income_tracts.json
