# Load data

In [11]:
import pandas as pd

df06 = pd.read_parquet('source/mansueto/dfc_addpno_pm_pin_geo_desc_met_munci_tract_category_2006_06_22.parquet', columns=['pin','property_address','mailing_name','mailing_street','property_street','is_org','class','exe_homeowner','class_desc','is_same_address','is_homeowner','tax_year','tract_GEOID','munci_comm'])

In [22]:
df22 = pd.read_parquet('source/mansueto/dfc_addpno_pm_pin_geo_desc_met_munci_tract_category_2022_06_22.parquet', columns=['pin','property_address','mailing_name','mailing_street','property_street','is_org','class','exe_homeowner','class_desc','is_same_address','is_homeowner','tax_year','tract_GEOID','munci_comm'])

In [5]:
df06.head()

Unnamed: 0,pin,property_address,mailing_name,mailing_street,property_street,is_org,class,exe_homeowner,class_desc,is_same_address,is_homeowner,tax_year
0,1011000370000,"203 DUNDEE AVE, ,",MARTIN O DONNELL,203 DUNDEE AVE,203 DUNDEE AVE,0.0,397,0,Special rental structure,1.0,0.0,2006
1,1011000500000,"121 STATION ST, ,",EXEMPT,,121 STATION ST,0.0,0,0,,,,2006
2,1011000930000,"138 STATION ST, ,",BARRINGTON,200 HOUGH ST,138 STATION ST,0.0,202,0,"One story residence, any age, up to 999 sq. ft.",0.0,0.0,2006
3,1011000990000,"118 LAKE ST, ,",DAVID MAHONEY,118 LAKE ST,118 LAKE ST,0.0,204,5000,"One story residence, any age, 1,801 sq. ft. an...",1.0,1.0,2006
4,1011050030000,"207 COOK ST, ,",M J JENSEN,,207 COOK ST,0.0,590,0,Commercial minor improvement,,,2006


In [23]:
df22.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1846867 entries, 0 to 1846866
Data columns (total 14 columns):
 #   Column            Dtype  
---  ------            -----  
 0   pin               object 
 1   property_address  object 
 2   mailing_name      object 
 3   mailing_street    object 
 4   property_street   object 
 5   is_org            float64
 6   class             object 
 7   exe_homeowner     int64  
 8   class_desc        object 
 9   is_same_address   float64
 10  is_homeowner      float64
 11  tax_year          int32  
 12  tract_GEOID       object 
 13  munci_comm        object 
dtypes: float64(3), int32(1), int64(1), object(9)
memory usage: 190.2+ MB


# Load and map class crosswalk

In [27]:
from class_type_crosswalk import class_type_dict

In [28]:
class_type_dict

{'201': 'single-family',
 '202': 'single-family',
 '203': 'single-family',
 '204': 'single-family',
 '205': 'single-family',
 '206': 'single-family',
 '207': 'single-family',
 '208': 'single-family',
 '209': 'single-family',
 '210': 'single-family',
 '211': '2-6 unit properties',
 '212': '2-6 unit properties',
 '213': 'other residential',
 '218': 'other residential',
 '219': 'other residential',
 '224': 'other residential',
 '225': 'other residential',
 '234': 'other residential',
 '236': 'other residential',
 '239': 'other residential',
 '240': 'other residential',
 '241': 'other residential',
 '278': 'other residential',
 '288': 'other residential',
 '290': 'other residential',
 '295': 'other residential',
 '297': 'other residential',
 '299': 'condo',
 '313': '7+ unit properties',
 '314': '7+ unit properties',
 '315': '7+ unit properties',
 '318': '7+ unit properties',
 '390': 'residential',
 '391': '7+ unit properties',
 '396': '7+ unit properties',
 '397': 'other residential',
 '39

In [17]:
df06['class_type'] = df06['class'].map(class_type_dict)

In [18]:
df06.head()

Unnamed: 0,pin,property_address,mailing_name,mailing_street,property_street,is_org,class,exe_homeowner,class_desc,is_same_address,is_homeowner,tax_year,tract_GEOID,munci_comm,class_type
0,1011000370000,"203 DUNDEE AVE, ,",MARTIN O DONNELL,203 DUNDEE AVE,203 DUNDEE AVE,0.0,397,0,Special rental structure,1.0,0.0,2006,17031804204,VILLAGE OF BARRINGTON,other residential
1,1011000500000,"121 STATION ST, ,",EXEMPT,,121 STATION ST,0.0,0,0,,,,2006,17031804204,VILLAGE OF BARRINGTON,
2,1011000930000,"138 STATION ST, ,",BARRINGTON,200 HOUGH ST,138 STATION ST,0.0,202,0,"One story residence, any age, up to 999 sq. ft.",0.0,0.0,2006,17031804204,VILLAGE OF BARRINGTON,single-family
3,1011000990000,"118 LAKE ST, ,",DAVID MAHONEY,118 LAKE ST,118 LAKE ST,0.0,204,5000,"One story residence, any age, 1,801 sq. ft. an...",1.0,1.0,2006,17031804204,VILLAGE OF BARRINGTON,single-family
4,1011050030000,"207 COOK ST, ,",M J JENSEN,,207 COOK ST,0.0,590,0,Commercial minor improvement,,,2006,17031804202,VILLAGE OF BARRINGTON,


In [20]:
# NaN means commercial or non-relevant class type
df06.groupby('class_type', dropna=False).size()

class_type
2-6 unit properties    172043
7+ unit properties      14709
condo                  364543
other residential      269980
residential               961
single-family          680793
NaN                    257245
dtype: int64

In [24]:
df22['class_type'] = df22['class'].map(class_type_dict)

In [25]:
# NaN means commercial or non-relevant class type
df22.groupby('class_type', dropna=False).size()

class_type
2-6 unit properties    164184
7+ unit properties      16614
condo                  451215
other residential      271872
residential              1300
single-family          692456
NaN                    249226
dtype: int64

# Check out 2006 and 2022

Look at select years to figure out what i want to pull out when i loop through all years.

In [21]:
pd.pivot_table(df06,
              index='class_type',
              columns='is_org',
              values='pin',
              aggfunc='count')

is_org,0.0,1.0
class_type,Unnamed: 1_level_1,Unnamed: 2_level_1
2-6 unit properties,159592,9923
7+ unit properties,9564,4747
condo,315803,43597
other residential,257999,9586
residential,594,334
single-family,654794,20170


In [26]:
pd.pivot_table(df22,
              index='class_type',
              columns='is_org',
              values='pin',
              aggfunc='count')

is_org,0.0,1.0
class_type,Unnamed: 1_level_1,Unnamed: 2_level_1
2-6 unit properties,137262,25660
7+ unit properties,6209,10256
condo,391936,56582
other residential,252901,17294
residential,572,709
single-family,644819,43840


In [32]:
# look at just 2-6 unit properties by community and save to csv

# 2006
g = pd.pivot_table(df06[df06['class_type'] == '2-6 unit properties'],
              index='munci_comm',
              columns='is_org',
              values='pin',
              aggfunc='count')
g['pct_is_org'] = g[1]/(g[0] + g[1])

# 2022
f = pd.pivot_table(df22[df22['class_type'] == '2-6 unit properties'],
              index='munci_comm',
              columns='is_org',
              values='pin',
              aggfunc='count')

f['pct_is_org'] = f[1]/(f[0] + f[1])

g.to_csv('output/2-6_unit_is_org_by_community_06.csv')
f.to_csv('output/2-6_unit_is_org_by_community_22.csv')

g

is_org,0.0,1.0,pct_is_org
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALBANY PARK,2408.0,76.0,0.030596
ARCHER HEIGHTS,780.0,21.0,0.026217
ARMOUR SQUARE,738.0,43.0,0.055058
ASHBURN,246.0,28.0,0.102190
AUBURN GRESHAM,2450.0,113.0,0.044089
...,...,...,...
WEST RIDGE,2701.0,104.0,0.037077
WEST TOWN,6696.0,745.0,0.100121
WHEELING TWP,3.0,,
WOODLAWN,1569.0,120.0,0.071048


In [34]:
# merge 20006 vs 2022 2-6 unit properties
m = pd.merge(g, f, on='munci_comm', how='outer', indicator=True)
m['_merge'].value_counts()
m.head()

is_org,0.0_x,1.0_x,pct_is_org_x,0.0_y,1.0_y,pct_is_org_y,_merge
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ALBANY PARK,2408.0,76.0,0.030596,2190.0,267.0,0.108669,both
ARCHER HEIGHTS,780.0,21.0,0.026217,780.0,49.0,0.059107,both
ARMOUR SQUARE,738.0,43.0,0.055058,658.0,122.0,0.15641,both
ASHBURN,246.0,28.0,0.10219,226.0,59.0,0.207018,both
AUBURN GRESHAM,2450.0,113.0,0.044089,2150.0,384.0,0.151539,both


In [38]:
# add change cols
m['change_num_is_org'] = m['1.0_y'] - m['1.0_x']
m['pct_change'] = m['change_num_is_org']/m['1.0_x']

In [39]:
# top ten greatest absolute increases in is_org buildings for 2-6
m.sort_values('change_num_is_org', ascending=False).head(10)

is_org,0.0_x,1.0_x,pct_is_org_x,0.0_y,1.0_y,pct_is_org_y,_merge,change_num_is_org,pct_change
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
WEST TOWN,6696.0,745.0,0.100121,4741.0,1671.0,0.260605,both,926.0,1.242953
LOGAN SQUARE,6113.0,379.0,0.05838,4885.0,1022.0,0.173015,both,643.0,1.69657
AUSTIN,6777.0,321.0,0.045224,6215.0,793.0,0.113156,both,472.0,1.470405
LAKE VIEW,3694.0,556.0,0.130824,2342.0,1022.0,0.303805,both,466.0,0.838129
NORTH LAWNDALE,3344.0,225.0,0.063043,2761.0,691.0,0.200174,both,466.0,2.071111
LOWER WEST SIDE,3229.0,186.0,0.054466,2836.0,631.0,0.182002,both,445.0,2.392473
GREATER GRAND CROSSING,2491.0,114.0,0.043762,2021.0,486.0,0.193857,both,372.0,3.263158
HUMBOLDT PARK,4572.0,201.0,0.042112,4148.0,569.0,0.120628,both,368.0,1.830846
AVONDALE,3464.0,128.0,0.035635,3026.0,478.0,0.136416,both,350.0,2.734375
SOUTH SHORE,2109.0,154.0,0.068051,1768.0,477.0,0.212472,both,323.0,2.097403


In [40]:
# top ten greatest pct change increases in is_org buildings for 2-6
m.sort_values('pct_change', ascending=False).head(10)

is_org,0.0_x,1.0_x,pct_is_org_x,0.0_y,1.0_y,pct_is_org_y,_merge,change_num_is_org,pct_change
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
VILLAGE OF EAST HAZELCREST,67.0,1.0,0.014706,38.0,24.0,0.387097,both,23.0,23.0
VILLAGE OF SAUK VILLAGE,17.0,1.0,0.055556,6.0,11.0,0.647059,both,10.0,10.0
CITY OF HICKORY HILLS,103.0,3.0,0.028302,81.0,29.0,0.263636,both,26.0,8.666667
VILLAGE OF LINCOLNWOOD,83.0,1.0,0.011905,87.0,8.0,0.084211,both,7.0,7.0
VILLAGE OF SCHAUMBURG,45.0,3.0,0.0625,28.0,20.0,0.416667,both,17.0,5.666667
CITY OF NORTH LAKE,38.0,2.0,0.05,34.0,13.0,0.276596,both,11.0,5.5
OHARE,413.0,13.0,0.030516,343.0,72.0,0.173494,both,59.0,4.538462
CITY OF CALUMET CITY,1030.0,32.0,0.030132,828.0,174.0,0.173653,both,142.0,4.4375
HERMOSA,1478.0,21.0,0.014009,1410.0,114.0,0.074803,both,93.0,4.428571
CHATHAM,1485.0,70.0,0.045016,1221.0,348.0,0.221797,both,278.0,3.971429


In [42]:
# export to csv
m.to_csv('output/2-6_unit_is_org_by_community_06-22.csv')

In [46]:
# look at just 7+ unit properties by community and save to csv

# 2006
g = pd.pivot_table(df06[df06['class_type'] == '7+ unit properties'],
              index='munci_comm',
              columns='is_org',
              values='pin',
              aggfunc='count')

g['pct_is_org'] = g[1]/(g[0] + g[1])
g['total_06'] = g[0] + g[1]

# 2022
f = pd.pivot_table(df22[df22['class_type'] == '7+ unit properties'],
              index='munci_comm',
              columns='is_org',
              values='pin',
              aggfunc='count')

f['pct_is_org'] = f[1]/(f[0] + f[1])
f['total_22'] = f[0] + f[1]

# merge 2006 vs 2022 7+ unit properties
m = pd.merge(g, f, on='munci_comm', how='outer', indicator=True)
m['_merge'].value_counts()

# add change cols
m['change_num_is_org'] = m['1.0_y'] - m['1.0_x']
m['pct_change_is_org'] = m['change_num_is_org']/m['1.0_x']

m['change_num_7_plus'] = m['total_22'] - m['total_06']
m['pct_change_7_plus'] = m['change_num_7_plus']/m['total_06']

In [47]:
# top ten greatest absolute increases in is_org buildings for 7+
m.sort_values('change_num_is_org', ascending=False).head(10)

is_org,0.0_x,1.0_x,pct_is_org_x,total_06,0.0_y,1.0_y,pct_is_org_y,total_22,_merge,change_num_is_org,pct_change_is_org,change_num_7_plus,pct_change_7_plus
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
WEST TOWN,179.0,108.0,0.376307,287.0,136.0,397.0,0.744841,533.0,both,289.0,2.675926,246.0,0.857143
SOUTH SHORE,287.0,221.0,0.435039,508.0,161.0,417.0,0.721453,578.0,both,196.0,0.886878,70.0,0.137795
LAKE VIEW,288.0,338.0,0.539936,626.0,201.0,529.0,0.724658,730.0,both,191.0,0.565089,104.0,0.166134
LOGAN SQUARE,205.0,79.0,0.278169,284.0,147.0,260.0,0.638821,407.0,both,181.0,2.291139,123.0,0.433099
NEAR WEST SIDE,62.0,116.0,0.651685,178.0,65.0,292.0,0.817927,357.0,both,176.0,1.517241,179.0,1.005618
ROGERS PARK,326.0,190.0,0.368217,516.0,201.0,357.0,0.639785,558.0,both,167.0,0.878947,42.0,0.081395
AUSTIN,273.0,141.0,0.34058,414.0,154.0,308.0,0.666667,462.0,both,167.0,1.184397,48.0,0.115942
UPTOWN,215.0,126.0,0.369501,341.0,152.0,274.0,0.643192,426.0,both,148.0,1.174603,85.0,0.249267
AUBURN GRESHAM,200.0,72.0,0.264706,272.0,77.0,217.0,0.738095,294.0,both,145.0,2.013889,22.0,0.080882
CHATHAM,230.0,112.0,0.327485,342.0,130.0,243.0,0.651475,373.0,both,131.0,1.169643,31.0,0.090643


In [49]:
# top ten greatest pct change increases in is_org buildings for 7+
m.sort_values('pct_change_is_org', ascending=False).head(10)

is_org,0.0_x,1.0_x,pct_is_org_x,total_06,0.0_y,1.0_y,pct_is_org_y,total_22,_merge,change_num_is_org,pct_change_is_org,change_num_7_plus,pct_change_7_plus
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
CITY OF ELGIN,23.0,2.0,0.08,25.0,3.0,27.0,0.9,30.0,both,25.0,12.5,5.0,0.2
VILLAGE OF MATTESON,8.0,3.0,0.272727,11.0,12.0,36.0,0.75,48.0,both,33.0,11.0,37.0,3.363636
LOWER WEST SIDE,95.0,7.0,0.068627,102.0,72.0,74.0,0.506849,146.0,both,67.0,9.571429,44.0,0.431373
MCKINLEY PARK,18.0,1.0,0.052632,19.0,15.0,10.0,0.4,25.0,both,9.0,9.0,6.0,0.315789
VILLAGE OF MT PROSPECT,47.0,8.0,0.145455,55.0,11.0,60.0,0.84507,71.0,both,52.0,6.5,16.0,0.290909
GARFIELD RIDGE,31.0,5.0,0.138889,36.0,20.0,32.0,0.615385,52.0,both,27.0,5.4,16.0,0.444444
VILLAGE OF BERKELEY,9.0,1.0,0.1,10.0,3.0,6.0,0.666667,9.0,both,5.0,5.0,-1.0,-0.1
VILLAGE OF ELMWOOD PARK,36.0,5.0,0.121951,41.0,21.0,30.0,0.588235,51.0,both,25.0,5.0,10.0,0.243902
VILLAGE OF BRIDGEVIEW,31.0,3.0,0.088235,34.0,20.0,18.0,0.473684,38.0,both,15.0,5.0,4.0,0.117647
CALUMET HEIGHTS,14.0,2.0,0.125,16.0,12.0,12.0,0.5,24.0,both,10.0,5.0,8.0,0.5


In [50]:
# top ten greatest pct change increases in 7+
m.sort_values('pct_change_7_plus', ascending=False).head(10)

is_org,0.0_x,1.0_x,pct_is_org_x,total_06,0.0_y,1.0_y,pct_is_org_y,total_22,_merge,change_num_is_org,pct_change_is_org,change_num_7_plus,pct_change_7_plus
munci_comm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
VILLAGE OF MATTESON,8.0,3.0,0.272727,11.0,12.0,36.0,0.75,48.0,both,33.0,11.0,37.0,3.363636
LOOP,4.0,21.0,0.84,25.0,40.0,57.0,0.587629,97.0,both,36.0,1.714286,72.0,2.88
NEAR SOUTH SIDE,6.0,16.0,0.727273,22.0,28.0,37.0,0.569231,65.0,both,21.0,1.3125,43.0,1.954545
MOUNT GREENWOOD,3.0,1.0,0.25,4.0,7.0,3.0,0.3,10.0,both,2.0,2.0,6.0,1.5
NEAR WEST SIDE,62.0,116.0,0.651685,178.0,65.0,292.0,0.817927,357.0,both,176.0,1.517241,179.0,1.005618
NEAR NORTH SIDE,68.0,102.0,0.6,170.0,110.0,226.0,0.672619,336.0,both,124.0,1.215686,166.0,0.976471
WEST TOWN,179.0,108.0,0.376307,287.0,136.0,397.0,0.744841,533.0,both,289.0,2.675926,246.0,0.857143
OAKLAND,10.0,45.0,0.818182,55.0,25.0,65.0,0.722222,90.0,both,20.0,0.444444,35.0,0.636364
WASHINGTON PARK,42.0,38.0,0.475,80.0,32.0,95.0,0.748031,127.0,both,57.0,1.5,47.0,0.5875
VILLAGE OF WINNETKA,5.0,14.0,0.736842,19.0,6.0,24.0,0.8,30.0,both,10.0,0.714286,11.0,0.578947


In [52]:
m.to_csv('output/7_plus_is_org_by_community_06-22.csv')