# Pull together big dataset

In [25]:
import pandas as pd
import geopandas as gpd
from class_type_crosswalk import class_type_dict
from munci_comm_crosswalk import geo_dict

In [21]:
# list of dfs to append
df_list = [] # by communities
df_tract_list = [] # by 2020 census tract

class_types_list = ['single-family', '2-6 unit properties', 'other residential', 'condo', '7+ unit properties']

for i in range(2006,2023):
    filepath = f"source/mansueto/dfc_addpno_pm_pin_geo_desc_met_munci_tract_category_{i}_06_22.parquet"
    df = pd.read_parquet(filepath, columns=['pin','property_address','mailing_name','mailing_street','property_street','is_org','class','exe_homeowner','class_desc','is_same_address','is_homeowner','tax_year','tract_GEOID','munci_comm'])

    # map classes
    df['class_type'] = df['class'].map(class_type_dict)

    # map 
    
    # create a pivot table for each class type
    for class_type in class_types_list:
        pivot = pd.pivot_table(df[df['class_type'] == class_type],
              index='munci_comm',
              columns='is_org',
              values='pin',
              aggfunc='count')

        tract_pivot = pd.pivot_table(df[df['class_type'] == class_type],
              index='tract_GEOID',
              columns='is_org',
              values='pin',
              aggfunc='count')
        
        # fill NaN as 0 so it adds up
        pivot.fillna(0, inplace=True)
        tract_pivot.fillna(0, inplace=True)
        
        # add cols
        pivot['pct_is_org'] = pivot[1]/(pivot[0] + pivot[1])
        pivot['total_parcels'] = pivot[0] + pivot[1]
        pivot['year'] = i
        pivot['class_category'] = class_type

        tract_pivot['pct_is_org'] = tract_pivot[1]/(tract_pivot[0] + tract_pivot[1])
        tract_pivot['total_parcels'] = tract_pivot[0] + tract_pivot[1]
        tract_pivot['year'] = i
        tract_pivot['class_category'] = class_type

        # rename cols
        pivot.rename(columns={0: "non_org", 1: "is_org"}, inplace=True)
        tract_pivot.rename(columns={0: "non_org", 1: "is_org"}, inplace=True)

        df_list.append(pivot)
        df_tract_list.append(tract_pivot)
        
    del df

In [22]:
# yay it worked!
print(len(df_list))
print(len(df_tract_list))

85
85


In [23]:
df_tract_list[16].head()

is_org,non_org,is_org,pct_is_org,total_parcels,year,class_category
tract_GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
17031010100,46.0,8.0,0.148148,54.0,2009,2-6 unit properties
17031010201,156.0,10.0,0.060241,166.0,2009,2-6 unit properties
17031010202,25.0,1.0,0.038462,26.0,2009,2-6 unit properties
17031010300,29.0,5.0,0.147059,34.0,2009,2-6 unit properties
17031010400,32.0,5.0,0.135135,37.0,2009,2-6 unit properties


In [40]:
# concat
df = pd.concat(df_list, axis=0)
df_tracts = pd.concat(df_tract_list, axis=0)

In [41]:
# map place for communities
df.reset_index(inplace=True)
df['place'] = df['munci_comm'].map(geo_dict)

In [42]:
# map place for tracts
df_tracts.reset_index(inplace=True)

# load chi tract map
chi_tracts = gpd.read_file('../geographies/Chicago_2020_tracts.geojson')

# make a dict out of GEOID and Community Area
chi_tract_dict = pd.Series(chi_tracts['Community Area'].values, index=chi_tracts['GEOID']).to_dict()

df_tracts['place'] = df_tracts['tract_GEOID'].map(chi_tract_dict)

# replace NaN with suburan cook
df_tracts['place'] = df_tracts['place'].fillna('Suburban Cook')

In [46]:
df_tracts.groupby('place', dropna=False).size().sort_values()

place
Burnside            68
Riverdale          108
Fuller Park        130
Hegewisch          136
Avalon Park        146
                 ...  
Austin            1864
Logan Square      2279
West Town         2535
Lake View         2826
Suburban Cook    39420
Length: 78, dtype: int64

In [47]:
df.to_csv('output/is_org_all_years_by_community.csv', index=False)
df_tracts.to_csv('output/is_org_all_years_by_tract.csv', index=False)

# Explore census tract data

In [48]:
df_tracts.head()

is_org,tract_GEOID,non_org,is_org.1,pct_is_org,total_parcels,year,class_category,place
0,17031010100,35.0,2.0,0.054054,37.0,2006,single-family,Rogers Park
1,17031010201,110.0,1.0,0.009009,111.0,2006,single-family,Rogers Park
2,17031010202,50.0,6.0,0.107143,56.0,2006,single-family,Rogers Park
3,17031010300,108.0,3.0,0.027027,111.0,2006,single-family,Rogers Park
4,17031010400,29.0,0.0,0.0,29.0,2006,single-family,Rogers Park
