In [1]:
import pandas as pd
import geopandas as gpd

## Load and clean data

### Load and clean *completed* ccso evictions data

Note: This is corrected data they gavem that includes the correct date entered, which is the date that the eviction was ordered by the court.

In [343]:
completed = pd.read_excel('source/Completed_Evictions_-_2019-01-01_to_2023-04-30.xlsx')

In [344]:
completed.head()

Unnamed: 0,Case Number,Order Entered Date,Received Date,Date of Eviction,Address,City,State,ZIP Code,Plaintiff Name,Defendant Name
0,20181713132,2018-11-09 00:00:00,2019-01-04,2019-01-11,6537 S ST.LAWRENCE AVE 1,CHICAGO,IL,60637,NTGLQ INVESTORS LP,UNKNOWN OCCUPANTS
1,20186008589,2018-12-14 00:00:00,2019-01-08,2019-01-16,521 W 136TH ST NA,RIVERDALE,IL,60827,VILLAGE OF RIVERDALE,"LEWANDOWSKI, GREGORY"
2,20181716824,2018-12-07 00:00:00,2019-01-02,2019-01-17,3793 S ARCHER AVE 1ST FLR LEFT 4 ROOM,CHICAGO,IL,60632,"CAO, XIAOMIN","SANTIAGO, HECTOR"
3,20182004874,2019-01-11 00:00:00,2019-01-28,2019-02-07,1834 NA GRANT NA 2,EVANSTON,IL,60201,"SALOMON, BILHA","NARCISI, LOUIS O"
4,20181717885,2019-01-14 00:00:00,2019-01-10,2019-02-13,1148 N LAWNDALE NA 3RD FLR 6,CHICAGO,IL,60651,"ESCOBAR, VICENTE","MUNIER, JERRY"


In [345]:
# convert Date of Eviction to datetime
completed['Date of Eviction'] = pd.to_datetime(completed['Date of Eviction'])

# create a year col
completed['enforced_year'] = completed['Date of Eviction'].dt.year
    
# create a month col
completed['enforced_month'] = completed['Date of Eviction'].dt.month
    
# create a month_year col
completed['enforced_month_year'] = completed['Date of Eviction'].dt.strftime('%m/%Y')

In [346]:
# check that my data matches up with Matt's data and that they used completed evictions
# Matt's data is from 1/1/2019 to 3/31/2022 and has 10,850 rows
mask = completed['Date of Eviction'] < '2022-04-01'
check = completed.loc[mask]
len(check)

10712

In [347]:
completed.head()

Unnamed: 0,Case Number,Order Entered Date,Received Date,Date of Eviction,Address,City,State,ZIP Code,Plaintiff Name,Defendant Name,enforced_year,enforced_month,enforced_month_year
0,20181713132,2018-11-09 00:00:00,2019-01-04,2019-01-11,6537 S ST.LAWRENCE AVE 1,CHICAGO,IL,60637,NTGLQ INVESTORS LP,UNKNOWN OCCUPANTS,2019,1,01/2019
1,20186008589,2018-12-14 00:00:00,2019-01-08,2019-01-16,521 W 136TH ST NA,RIVERDALE,IL,60827,VILLAGE OF RIVERDALE,"LEWANDOWSKI, GREGORY",2019,1,01/2019
2,20181716824,2018-12-07 00:00:00,2019-01-02,2019-01-17,3793 S ARCHER AVE 1ST FLR LEFT 4 ROOM,CHICAGO,IL,60632,"CAO, XIAOMIN","SANTIAGO, HECTOR",2019,1,01/2019
3,20182004874,2019-01-11 00:00:00,2019-01-28,2019-02-07,1834 NA GRANT NA 2,EVANSTON,IL,60201,"SALOMON, BILHA","NARCISI, LOUIS O",2019,2,02/2019
4,20181717885,2019-01-14 00:00:00,2019-01-10,2019-02-13,1148 N LAWNDALE NA 3RD FLR 6,CHICAGO,IL,60651,"ESCOBAR, VICENTE","MUNIER, JERRY",2019,2,02/2019


In [353]:
# total number of rows
len(completed)

17006

### Create pre and post moratorium dataframes of completed evictions

In [349]:
# create a df for evictions completed prior to and after oct 3 2021, when the moratorium ended
# https://www.wbez.org/stories/the-eviction-moratorium-in-illinois-has-ended/326f1ffb-2262-4c5a-afa4-047bf65d56c1
mask = (completed['Date of Eviction'] > '2021-10-03')
completed_post = completed.loc[mask]
len(completed_post)

8119

In [350]:
# check start date
completed_post.sort_values('Date of Eviction', ascending=True).head(2)

Unnamed: 0,Case Number,Order Entered Date,Received Date,Date of Eviction,Address,City,State,ZIP Code,Plaintiff Name,Defendant Name,enforced_year,enforced_month,enforced_month_year
8887,20211700624,2021-09-13 00:00:00,2021-09-23,2021-10-04,701 S WELLS ST UNIT #2004,CHICAGO,IL,60607,"PURDY, JULIE","SMITH, ASHA GLADNEY",2021,10,10/2021
8888,2021701729,2021-09-09 00:00:00,2021-09-22,2021-10-04,111 W WACKER DR 1614,CHICAGO,IL,60601,BOZZUTO MANAGEMEN COMPANY,"AFESUKU, TAMARAMIEMENE",2021,10,10/2021


In [351]:
# check end date
completed_post.sort_values('Date of Eviction', ascending=False).head(2)

Unnamed: 0,Case Number,Order Entered Date,Received Date,Date of Eviction,Address,City,State,ZIP Code,Plaintiff Name,Defendant Name,enforced_year,enforced_month,enforced_month_year
17005,20221702207,2022-10-11 00:00:00,2022-11-16,2023-04-28,4934 W VAN BUREN ST NA,CHICAGO,IL,60644,"MHOON, JUARIE","WOODS, JOE",2023,4,04/2023
16991,20221718804,2023-03-09 00:00:00,2023-03-13,2023-04-28,1156 S MASON AVE D2,CHICAGO,IL,60644,ROOSEVELT & MASON PROPERTY LLC,"WESLEY, CHEQUIA",2023,4,04/2023


In [352]:
# create a pre moratorium dataframe
mask = (completed['Date of Eviction'] <= '2021-10-03')
completed_pre = completed.loc[mask]
len(completed_pre)

8887

## Analysis for story

### Takeaway 1: Racial disparities

### Takeaway 2: Are there areas (e.g., Zip Codes) where evictions are higher post-moratorium?

# ALL CCSO EVICTIONS DATA BELOW

### Load and clean ccso evictions data

In [None]:
df = pd.read_excel('source/CCSO_BIU_FOIA_Qin_04052023_DRAFT_v1.0.xlsx')

In [10]:
df.head()

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,DefState,DefZipCode,PltName,DefName,EVICTIonCompleteDate
0,2019-01-02 10:38:09.943,20181716392,628,2019-01-02 12:00:00,11761,S,LOWE,AVE,,CHICAGO,IL,60628,"BILLUPS, BRIAN","WILLIAMS, DURRELL",NaT
1,2019-01-02 10:53:37.793,20181715954,619,2019-01-02 12:00:00,8313,S,INGLESIDE,,,CHICAGO,IL,60619,"QUADRI, SYED","FOX, ALTRICE",2019-02-14
2,2019-01-02 12:37:19.590,20181702931,624,2019-01-02 12:00:00,4147,W,ARTHINGTON,,APT 4,CHICAGO,IL,60624,"WILSON, LARRY","CARDINE, LISA",NaT
3,2019-01-02 13:39:00.490,20181712456,620,2019-01-02 12:00:00,7718,S,MARSHFIELD,AVE,,CHICAGO,IL,60620,REVERSE MORTGAGE SOLUTIONS INC,"BROCKETT, COURTNEY",NaT
4,2019-01-02 14:08:27.893,20181719438,619,2019-01-02 12:00:00,300,E,95TH,ST,,CHICAGO,IL,60619,SKN INVESTMENT INC,ALL UNKNOWN OCCUPANTS,2019-03-26


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32148 entries, 0 to 32147
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date_entered          32148 non-null  datetime64[ns]
 1   CaseNum               32148 non-null  object        
 2   DistrictNum           32073 non-null  object        
 3   Received_date         32148 non-null  datetime64[ns]
 4   DefStreetNum          32148 non-null  object        
 5   DefStreetDir          28087 non-null  object        
 6   DefStreetName         32148 non-null  object        
 7   DefStreetType         28361 non-null  object        
 8   DefUnitNum            25205 non-null  object        
 9   DefCity               32147 non-null  object        
 10  DefState              32148 non-null  object        
 11  DefZipCode            32148 non-null  int64         
 12  PltName               32148 non-null  object        
 13  DefName         

In [103]:
# convert Date_entered to datetime
df['Date_entered'] = pd.to_datetime(df['Date_entered'])

# create a year col
df['entered_year'] = df['Date_entered'].dt.year
    
# create a month col
df['entered_month'] = df['Date_entered'].dt.month
    
# create a month_year col
df['entered_month_year'] = df['Date_entered'].dt.strftime('%m/%Y')
    
# create a day col
df['entered_day'] = df['Date_entered'].dt.strftime('%Y-%m-%d')

# convert received date to datetime
df['Received_date'] = pd.to_datetime(df['Received_date'])

# create a year col
df['received_year'] = df['Received_date'].dt.year
    
# create a month col
df['received_month'] = df['Received_date'].dt.month

# create a month_year col
df['received_month_year'] = df['Received_date'].dt.strftime('%m/%Y')

# create a new Evictiction Complete Date col that is a datetime object
df['eviction_completed'] = pd.to_datetime(df['EVICTIonCompleteDate'])

# replace NaT with 'not completed'
df['eviction_completed'].fillna('not completed', inplace=True)

# replace dates with 'completed'
df.loc[df['eviction_completed'] != 'not completed', 'eviction_completed'] = 'completed'

# change Evictiction Complete Date col to a datetime object
df['EVICTIonCompleteDate'] = pd.to_datetime(df['EVICTIonCompleteDate'])

# create a year eviction completed col
df['completed_year'] = df['EVICTIonCompleteDate'].dt.year

# create a month eviction completed col
df['completed_month'] = df['EVICTIonCompleteDate'].dt.month

# create a month_year col
df['completed_month_year'] = df['EVICTIonCompleteDate'].dt.strftime('%m/%Y')

In [195]:
df.head()

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,entered_month_year,entered_day,eviction_complete_date,eviction_completed,completed_year,completed_month,received_year,received_month,received_month_year,completed_month_year
0,2019-01-02 10:38:09.943,20181716392,628,2019-01-02 12:00:00,11761,S,LOWE,AVE,,CHICAGO,...,01/2019,2019-01-02,NaT,not completed,,,2019,1,01/2019,
1,2019-01-02 10:53:37.793,20181715954,619,2019-01-02 12:00:00,8313,S,INGLESIDE,,,CHICAGO,...,01/2019,2019-01-02,2019-02-14,completed,2019.0,2.0,2019,1,01/2019,02/2019
2,2019-01-02 12:37:19.590,20181702931,624,2019-01-02 12:00:00,4147,W,ARTHINGTON,,APT 4,CHICAGO,...,01/2019,2019-01-02,NaT,not completed,,,2019,1,01/2019,
3,2019-01-02 13:39:00.490,20181712456,620,2019-01-02 12:00:00,7718,S,MARSHFIELD,AVE,,CHICAGO,...,01/2019,2019-01-02,NaT,not completed,,,2019,1,01/2019,
4,2019-01-02 14:08:27.893,20181719438,619,2019-01-02 12:00:00,300,E,95TH,ST,,CHICAGO,...,01/2019,2019-01-02,2019-03-26,completed,2019.0,3.0,2019,1,01/2019,03/2019


### Sense check the data

In [108]:
# check completed == 17,100
df.groupby('eviction_completed').size()

eviction_completed
completed        17100
not completed    15048
dtype: int64

In [160]:
# first date
df.sort_values('entered_day').head(1)

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,DefZipCode,PltName,DefName,EVICTIonCompleteDate,entered_year,entered_month,entered_month_year,entered_day,eviction_complete_date,eviction_completed
0,2019-01-02 10:38:09.943,20181716392,628,2019-01-02 12:00:00,11761,S,LOWE,AVE,,CHICAGO,...,60628,"BILLUPS, BRIAN","WILLIAMS, DURRELL",NaT,2019,1,01/2019,2019-01-02,NaT,not completed


In [161]:
# last date
df.sort_values('entered_day').tail(1)

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,DefZipCode,PltName,DefName,EVICTIonCompleteDate,entered_year,entered_month,entered_month_year,entered_day,eviction_complete_date,eviction_completed
32147,2023-04-06 10:36:49.587,20221720184,660,2023-04-06 10:36:50.953,1055,W,GRANVILLE,Ave,409,CHICAGO,...,60660,BCHGRANVILLE,"CURRAN, GEORGE",NaT,2023,4,04/2023,2023-04-06,NaT,not completed


In [327]:
# check that my data matches up with Matt's data and that they used completed evictions
# Matt's data is from 1/1/2019 to 3/31/2022 and has 10,850 rows
# close enough: using recieved date would have given me 21,512 rows
mask = df['EVICTIonCompleteDate'] < '2022-04-01'
check = df.loc[mask]
len(check)

11173

### Create pre, post moratorium dataframes

In [329]:
# create a df for evictions completed prior to and after oct 3 2021, when the moratorium ended
# https://www.wbez.org/stories/the-eviction-moratorium-in-illinois-has-ended/326f1ffb-2262-4c5a-afa4-047bf65d56c1
mask = (df['EVICTIonCompleteDate'] > '2021-10-03')
df_post = df.loc[mask]

In [332]:
df_post.sort_values('EVICTIonCompleteDate', ascending=True).head()

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,entered_month_year,entered_day,eviction_complete_date,eviction_completed,completed_year,completed_month,received_year,received_month,received_month_year,completed_month_year
23436,2021-09-17 10:00:25.433,20211701593,642,2021-09-17 12:00:00,713,N,MILWAUKEE,,501,CHCIAGO,...,09/2021,2021-09-17,2021-10-04,completed,2021.0,10.0,2021,9,09/2021,10/2021
28261,2021-09-23 15:57:18.003,20211700624,607,2021-09-23 12:00:00,701,S,WELLS,ST,UNIT #2004,CHICAGO,...,09/2021,2021-09-23,2021-10-04,completed,2021.0,10.0,2021,9,09/2021,10/2021
28259,2021-09-22 10:42:20.117,2021701729,601,2021-09-22 12:00:00,111,W,WACKER,DR,1614,CHICAGO,...,09/2021,2021-09-22,2021-10-04,completed,2021.0,10.0,2021,9,09/2021,10/2021
7657,2021-09-17 10:00:25.433,20211702162,653,2021-09-17 12:00:00,1126,E,47TH,ST,APT L4,CHICAGO,...,09/2021,2021-09-17,2021-10-05,completed,2021.0,10.0,2021,9,09/2021,10/2021
7665,2021-09-22 09:58:12.427,20191714699,609,2021-09-22 12:00:00,1256,W,49TH,PL,1R,CHICAGO,...,09/2021,2021-09-22,2021-10-05,completed,2021.0,10.0,2021,9,09/2021,10/2021


In [333]:
df_post_moratorium.sort_values('EVICTIonCompleteDate', ascending=False).head(2)

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,entered_month_year,entered_day,eviction_complete_date,eviction_completed,completed_year,completed_month,received_year,received_month,received_month_year,completed_month_year
29987,2022-12-16 10:13:28.540,20221712752,601,2022-12-16 12:00:00.000,201,N,GARLAND,CT,3810.0,CHICAGO,...,12/2022,2022-12-16,2023-04-06,completed,2023.0,4.0,2022,12,12/2022,04/2023
13409,2023-02-15 09:41:41.797,20221712427,639,2023-02-15 09:41:48.127,1721,N,Major,Ave,,Chicago,...,02/2023,2023-02-15,2023-04-06,completed,2023.0,4.0,2023,2,02/2023,04/2023


In [319]:
mask = (df['Received_date'] <= '2021-10-03')
df_pre_moratorium = df.loc[mask]

In [320]:
df_pre_moratorium.sort_values('Received_date', ascending=True).head(2)

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,entered_month_year,entered_day,eviction_complete_date,eviction_completed,completed_year,completed_month,received_year,received_month,received_month_year,completed_month_year
0,2019-01-02 10:38:09.943,20181716392,628,2019-01-02 12:00:00,11761,S,LOWE,AVE,,CHICAGO,...,01/2019,2019-01-02,NaT,not completed,,,2019,1,01/2019,
123,2019-01-02 14:10:25.803,20185007966,415,2019-01-02 12:00:00,10518,S,RIDGELAND,AVE,APT 11,CHICAGO RIDGE,...,01/2019,2019-01-02,NaT,not completed,,,2019,1,01/2019,


In [321]:
df_pre_moratorium.sort_values('Received_date', ascending=False).head(2)

Unnamed: 0,Date_entered,CaseNum,DistrictNum,Received_date,DefStreetNum,DefStreetDir,DefStreetName,DefStreetType,DefUnitNum,DefCity,...,entered_month_year,entered_day,eviction_complete_date,eviction_completed,completed_year,completed_month,received_year,received_month,received_month_year,completed_month_year
30091,2021-10-01 16:35:05.010,2036393,56,2021-10-01 12:00:00,1905,,GROVE,DR,449,MT PROSPECT,...,10/2021,2021-10-01,2021-10-13,completed,2021.0,10.0,2021,10,10/2021,10/2021
28269,2021-10-01 16:39:36.953,20201702618,634,2021-10-01 12:00:00,3938,N,CENTRAL,AVE,2ND FLR,CHICAGO,...,10/2021,2021-10-01,2021-10-13,completed,2021.0,10.0,2021,10,10/2021,10/2021


### Load census renters data

Source: https://censusreporter.org/data/map/?table=B25003&geo_ids=05000US17031,860%7C05000US17031&primary_geo_id=05000US17031#column%7CB25003002,sumlev%7C860

In [17]:
renters = pd.read_csv('source/acs2021_5yr_B25003_86000US60074.csv')

In [18]:
renters.head()

Unnamed: 0,geoid,name,B25003001,"B25003001, Error",B25003002,"B25003002, Error",B25003003,"B25003003, Error"
0,05000US17031,"Cook County, IL",2044658,3765,1175143,6780,869515,6667
1,86000US60004,60004,20494,489,15763,617,4731,401
2,86000US60005,60005,12405,560,7906,450,4499,505
3,86000US60007,60007,12745,507,9553,490,3192,376
4,86000US60008,60008,8731,363,6217,341,2514,309


In [19]:
# delete the first row which is the cook county total
renters = renters[1:]

In [20]:
# rename columns
columns = ['geoid', 'zip', 'total', 'total_moe', 'owner_occupied', 'owner_occupied_moe', 'renter_occupied', 'renter_occupied_moe']
renters.columns = columns

In [21]:
renters.head()

Unnamed: 0,geoid,zip,total,total_moe,owner_occupied,owner_occupied_moe,renter_occupied,renter_occupied_moe
1,86000US60004,60004,20494,489,15763,617,4731,401
2,86000US60005,60005,12405,560,7906,450,4499,505
3,86000US60007,60007,12745,507,9553,490,3192,376
4,86000US60008,60008,8731,363,6217,341,2514,309
5,86000US60010,60010,16999,575,14906,520,2093,462


## Exploratory analysis of all CCSO evictions

In [188]:
# how many total evictions were enforced by the sheriff's office each year?
enforced = df.groupby('completed_year').size().reset_index(name = 'enforced_count')
enforced

Unnamed: 0,completed_year,enforced_count
0,2000.0,32
1,2019.0,6204
2,2020.0,1720
3,2021.0,2112
4,2022.0,5022
5,2023.0,2010


In [189]:
# how many total evictions were recieved by the sheriff's office each year?
received = df.groupby('received_year').size().reset_index(name = 'received_count')
received

Unnamed: 0,received_year,received_count
0,2019,13835
1,2020,3153
2,2021,2740
3,2022,8998
4,2023,3422


In [191]:
# what is the rate evictions enforced by the sheriff's office per year?
rate = enforced.merge(received, left_on='completed_year', right_on='received_year')
rate['rate'] = rate['enforced_count']/rate['received_count']
rate

Unnamed: 0,completed_year,enforced_count,received_year,received_count,rate
0,2019.0,6204,2019,13835,0.448428
1,2020.0,1720,2020,3153,0.545512
2,2021.0,2112,2021,2740,0.770803
3,2022.0,5022,2022,8998,0.558124
4,2023.0,2010,2023,3422,0.587376


## Enforcement rate analysis (DON'T USE)

In [199]:
enforced_month = df.groupby('completed_month_year').size().reset_index(name = 'enforced_count')
enforced_month.sort_values('completed_month_year').tail()

Unnamed: 0,completed_month_year,enforced_count
46,11/2022,466
47,12/2019,527
48,12/2020,57
49,12/2021,255
50,12/2022,371


In [201]:
received_month = df.groupby('received_month_year').size().reset_index(name = 'received_count')
received_month.sort_values('received_month_year').tail()

Unnamed: 0,received_month_year,received_count
45,11/2022,908
46,12/2019,1093
47,12/2020,89
48,12/2021,414
49,12/2022,969


In [203]:
# how has the eviction enforcement rate changed month by month?
rate_month = enforced_month.merge(received_month, left_on='completed_month_year', right_on='received_month_year')
rate_month['rate'] = rate_month['enforced_count']/rate_month['received_count']
rate_month.head()

Unnamed: 0,completed_month_year,enforced_count,received_month_year,received_count,rate
0,01/2019,3,01/2019,1093,0.002745
1,01/2020,722,01/2020,1166,0.619211
2,01/2021,51,01/2021,97,0.525773
3,01/2022,260,01/2022,506,0.513834
4,01/2023,572,01/2023,975,0.586667


## Zipcodes analysis

In [22]:
# convert zip to an int
renters['zip'] = renters['zip'].astype(int)

In [26]:
# 176 zip codes
len(renters)

176

### How many rows are missing zipcode data?

In [28]:
df['DefZipCode'].isna().sum()

0

### What are the top zipcodes and top zipcodes by year?

In [204]:
# group received evictions by zip code
dfz = df.groupby('DefZipCode').size().reset_index(name='received_count')

In [205]:
# top 10 zip codes for evictions received overall
dfz.sort_values('received_count', ascending=False).head(10)

Unnamed: 0,DefZipCode,received_count
172,60649,2041
144,60619,1569
145,60620,1239
160,60637,1123
167,60644,1033
152,60628,870
140,60615,864
142,60617,807
149,60624,753
153,60629,743


In [207]:
# group enforced evictions by zip code
dfz_enforced = df[df['eviction_completed'] == 'completed'].groupby('DefZipCode').size().reset_index(name='enforced_count')
dfz_enforced['enforced_count'].sum()

17100

In [208]:
# top 10 zip codes for evictions enforced overall, same top 10 zip codes
dfz_enforced.sort_values('enforced_count', ascending=False).head(10)

Unnamed: 0,DefZipCode,enforced_count
164,60649,1098
137,60619,830
138,60620,642
153,60637,569
160,60644,539
145,60628,452
135,60617,451
133,60615,416
142,60624,401
146,60629,394


In [209]:
# zip codes by year for evictions received
dfz_year = pd.pivot_table(df, values='CaseNum', index='DefZipCode', columns='received_year', aggfunc='count').reset_index()

In [210]:
dfz_year.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   DefZipCode  198 non-null    int64  
 1   2019        179 non-null    float64
 2   2020        154 non-null    float64
 3   2021        160 non-null    float64
 4   2022        179 non-null    float64
 5   2023        164 non-null    float64
dtypes: float64(5), int64(1)
memory usage: 9.4 KB


In [211]:
# top 10 zip codes in 2023
dfz_year.sort_values(2023, ascending=False).head(10)

received_year,DefZipCode,2019,2020,2021,2022,2023
172,60649,999.0,185.0,138.0,522.0,197.0
144,60619,685.0,175.0,114.0,404.0,191.0
160,60637,496.0,127.0,86.0,282.0,132.0
140,60615,352.0,78.0,64.0,246.0,124.0
145,60620,601.0,122.0,65.0,327.0,124.0
167,60644,511.0,98.0,62.0,249.0,113.0
152,60628,371.0,86.0,58.0,255.0,100.0
146,60621,256.0,85.0,47.0,175.0,90.0
159,60636,228.0,56.0,49.0,182.0,84.0
142,60617,360.0,93.0,64.0,209.0,81.0


In [212]:
# top 10 zip codes in 2022
dfz_year.sort_values(2022, ascending=False).head(10)

received_year,DefZipCode,2019,2020,2021,2022,2023
172,60649,999.0,185.0,138.0,522.0,197.0
144,60619,685.0,175.0,114.0,404.0,191.0
145,60620,601.0,122.0,65.0,327.0,124.0
160,60637,496.0,127.0,86.0,282.0,132.0
152,60628,371.0,86.0,58.0,255.0,100.0
167,60644,511.0,98.0,62.0,249.0,113.0
140,60615,352.0,78.0,64.0,246.0,124.0
74,60411,265.0,59.0,37.0,229.0,66.0
142,60617,360.0,93.0,64.0,209.0,81.0
153,60629,350.0,83.0,37.0,204.0,69.0


In [22]:
# export dfz_year to csv
# dfz_year.to_csv('processed/evictions_by_zip_year.csv')

In [213]:
# zip codes by year for evictions enforced
dfz_year_enforced = pd.pivot_table(df[df['eviction_completed'] == 'completed'], values='CaseNum', index='DefZipCode', columns='received_year', aggfunc='count').reset_index()

In [214]:
# top 10 zip codes in 2023
dfz_year_enforced.sort_values(2023, ascending=False).head(10)

received_year,DefZipCode,2019,2020,2021,2022,2023
164,60649,555.0,55.0,109.0,317.0,62.0
137,60619,369.0,49.0,75.0,279.0,58.0
153,60637,260.0,33.0,61.0,173.0,42.0
138,60620,314.0,29.0,52.0,209.0,38.0
160,60644,264.0,26.0,50.0,163.0,36.0
145,60628,186.0,29.0,41.0,162.0,34.0
134,60616,115.0,12.0,38.0,89.0,32.0
135,60617,203.0,33.0,52.0,138.0,25.0
68,60411,143.0,22.0,29.0,153.0,25.0
142,60624,186.0,25.0,49.0,118.0,23.0


### Are there zipcodes where the rate of enforcement is higher?

In [222]:
# rename cols
dfz_year_enforced.rename(columns = {2019: '2019_enforced', 2020: '2020_enforced', 2021: '2021_enforced', 2022: '2022_enforced', 2023: '2023_enforced'}, inplace=True)
dfz_year.rename(columns = {2019: '2019_received', 2020: '2020_received', 2021: '2021_received', 2022: '2022_received', 2023: '2023_received'}, inplace=True)

In [223]:
# merge recieved and enforced
dfz_year = dfz_year.merge(dfz_year_enforced, on='DefZipCode')

In [225]:
dfz_year['2023_enforced'].sum()

999.0

In [227]:
dfz_year_enforced['2023_enforced'].sum()

999.0

In [228]:
dfz_year.tail()

received_year,DefZipCode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,2023_enforced
179,60714,33.0,5.0,7.0,17.0,6.0,18.0,3.0,6.0,13.0,3.0
180,60803,52.0,8.0,3.0,29.0,15.0,19.0,2.0,2.0,12.0,3.0
181,60804,110.0,24.0,29.0,94.0,34.0,56.0,6.0,21.0,62.0,15.0
182,60805,20.0,8.0,4.0,20.0,4.0,9.0,3.0,1.0,9.0,1.0
183,60827,237.0,48.0,31.0,115.0,31.0,126.0,12.0,26.0,66.0,9.0


In [230]:
# calculate enforcement rate each year
dfz_year['enforcement_rate_2019'] = dfz_year['2019_enforced']/dfz_year['2019_received']
dfz_year['enforcement_rate_2020'] = dfz_year['2020_enforced']/dfz_year['2020_received']
dfz_year['enforcement_rate_2021'] = dfz_year['2021_enforced']/dfz_year['2021_received']
dfz_year['enforcement_rate_2022'] = dfz_year['2022_enforced']/dfz_year['2022_received']
dfz_year['enforcement_rate_2023'] = dfz_year['2023_enforced']/dfz_year['2023_received']

In [231]:
# calculate enforcement rate for the entire dataset
dfz_year['total_received'] = dfz_year['2019_received'] + dfz_year['2020_received'] + dfz_year['2021_received'] + dfz_year['2022_received'] + dfz_year['2023_received']
dfz_year['total_enforced'] = dfz_year['2019_enforced'] + dfz_year['2020_enforced'] + dfz_year['2021_enforced'] + dfz_year['2022_enforced'] + dfz_year['2023_enforced']
dfz_year['enforcement_rate'] = dfz_year['total_enforced']/dfz_year['total_received']

In [235]:
# top 15 zips with the highest enforcement rate
dfz_year[['DefZipCode', 'total_received', 'total_enforced', 'enforcement_rate']].sort_values('enforcement_rate', ascending=False).head(15)

received_year,DefZipCode,total_received,total_enforced,enforcement_rate
131,60613,166.0,111.0,0.668675
168,60654,156.0,104.0,0.666667
51,60173,93.0,61.0,0.655914
158,60642,66.0,43.0,0.651515
119,60601,92.0,59.0,0.641304
123,60605,245.0,157.0,0.640816
25,60089,22.0,14.0,0.636364
87,60457,49.0,31.0,0.632653
179,60714,68.0,43.0,0.632353
7,60008,70.0,44.0,0.628571


### Merge renters and evictions by zip dfz_year

In [236]:
# conduct an anti-join to see how many zip codes are not matched
unmatched = (dfz_year.merge(renters, left_on='DefZipCode', right_on='zip', how='outer', indicator=True)
     .query('_merge != "both"')
     .drop('_merge', 1))

  .drop('_merge', 1))


In [237]:
len(unmatched)

20

In [239]:
# only 23 evictions received were associated with zips not matched in the census renters data or evictions data
unmatched['2019_received'].sum() + unmatched['2020_received'].sum() + unmatched['2021_received'].sum() + unmatched['2022_received'].sum() + unmatched['2023_received'].sum()

23.0

In [240]:
unmatched

Unnamed: 0,DefZipCode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,total_enforced,enforcement_rate,geoid,zip,total,total_moe,owner_occupied,owner_occupied_moe,renter_occupied,renter_occupied_moe
0,90.0,,,1.0,,,,,1.0,,...,,,,,,,,,,
1,471.0,,,,1.0,,,,,1.0,...,,,,,,,,,,
2,6030.0,,,,1.0,,,,,1.0,...,,,,,,,,,,
3,50409.0,1.0,,,,,1.0,,,,...,,,,,,,,,,
15,60038.0,1.0,,,,,1.0,,,,...,,,,,,,,,,
32,60108.0,,,,,1.0,,,,,...,,,,,,,,,,
42,60161.0,1.0,1.0,,3.0,,1.0,,,2.0,...,,,,,,,,,,
48,60170.0,,,,1.0,,,,,1.0,...,,,,,,,,,,
57,60196.0,1.0,,,,,1.0,,,,...,,,,,,,,,,
70,60418.0,,,,2.0,2.0,,,,1.0,...,,,,,,,,,,


In [241]:
# merge renters and dfz_year
evictions = dfz_year.merge(renters, left_on='DefZipCode', right_on='zip', how='inner')

In [242]:
del evictions['geoid']

In [243]:
del evictions['zip']

In [244]:
evictions.rename(columns = {'DefZipCode': 'Zipcode'}, inplace=True)

In [245]:
evictions.head()

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,enforcement_rate_2023,total_received,total_enforced,enforcement_rate,total,total_moe,owner_occupied,owner_occupied_moe,renter_occupied,renter_occupied_moe
0,60004,21.0,11.0,2.0,21.0,10.0,11.0,5.0,2.0,16.0,...,0.4,65.0,38.0,0.584615,20494,489,15763,617,4731,401
1,60005,29.0,7.0,11.0,32.0,12.0,12.0,4.0,10.0,22.0,...,0.25,91.0,51.0,0.56044,12405,560,7906,450,4499,505
2,60007,22.0,8.0,9.0,24.0,8.0,10.0,2.0,9.0,15.0,...,0.375,71.0,39.0,0.549296,12745,507,9553,490,3192,376
3,60008,29.0,4.0,10.0,18.0,9.0,19.0,1.0,8.0,10.0,...,0.666667,70.0,44.0,0.628571,8731,363,6217,341,2514,309
4,60010,5.0,4.0,,6.0,4.0,4.0,2.0,,3.0,...,0.5,,,,16999,575,14906,520,2093,462


In [246]:
# calculate evictions received per 1000 renter occupied units by year
evictions['evic_per_1000_units_2023'] = (evictions['2023_received']/evictions['renter_occupied'])*1000
evictions['evic_per_1000_units_2022'] = (evictions['2022_received']/evictions['renter_occupied'])*1000
evictions['evic_per_1000_units_2021'] = (evictions['2021_received']/evictions['renter_occupied'])*1000
evictions['evic_per_1000_units_2020'] = (evictions['2020_received']/evictions['renter_occupied'])*1000
evictions['evic_per_1000_units_2019'] = (evictions['2019_received']/evictions['renter_occupied'])*1000

In [248]:
# calculate evictions enforced per 1000 renter occupied units by year
evictions['enforced_per_1000_units_2023'] = (evictions['2023_enforced']/evictions['renter_occupied'])*1000
evictions['enforced_per_1000_units_2022'] = (evictions['2022_enforced']/evictions['renter_occupied'])*1000
evictions['enforced_per_1000_units_2021'] = (evictions['2021_enforced']/evictions['renter_occupied'])*1000
evictions['enforced_per_1000_units_2020'] = (evictions['2020_enforced']/evictions['renter_occupied'])*1000
evictions['enforced_per_1000_units_2019'] = (evictions['2019_enforced']/evictions['renter_occupied'])*1000

In [249]:
evictions.head()

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,evic_per_1000_units_2023,evic_per_1000_units_2022,evic_per_1000_units_2021,evic_per_1000_units_2020,evic_per_1000_units_2019,enforced_per_1000_units_2023,enforced_per_1000_units_2022,enforced_per_1000_units_2021,enforced_per_1000_units_2020,enforced_per_1000_units_2019
0,60004,21.0,11.0,2.0,21.0,10.0,11.0,5.0,2.0,16.0,...,2.113718,4.438808,0.422744,2.32509,4.438808,0.845487,3.381949,0.422744,1.056859,2.32509
1,60005,29.0,7.0,11.0,32.0,12.0,12.0,4.0,10.0,22.0,...,2.667259,7.112692,2.444988,1.555901,6.445877,0.666815,4.889976,2.222716,0.889086,2.667259
2,60007,22.0,8.0,9.0,24.0,8.0,10.0,2.0,9.0,15.0,...,2.506266,7.518797,2.819549,2.506266,6.892231,0.93985,4.699248,2.819549,0.626566,3.132832
3,60008,29.0,4.0,10.0,18.0,9.0,19.0,1.0,8.0,10.0,...,3.579952,7.159905,3.977725,1.59109,11.535402,2.386635,3.977725,3.18218,0.397772,7.557677
4,60010,5.0,4.0,,6.0,4.0,4.0,2.0,,3.0,...,1.911132,2.866699,,1.911132,2.388915,0.955566,1.433349,,0.955566,1.911132


In [250]:
evictions[evictions['Zipcode'] == 60411]

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,evic_per_1000_units_2023,evic_per_1000_units_2022,evic_per_1000_units_2021,evic_per_1000_units_2020,evic_per_1000_units_2019,enforced_per_1000_units_2023,enforced_per_1000_units_2022,enforced_per_1000_units_2021,enforced_per_1000_units_2020,enforced_per_1000_units_2019
59,60411,265.0,59.0,37.0,229.0,66.0,143.0,22.0,29.0,153.0,...,10.538081,36.563947,5.907712,9.420406,42.311991,3.991697,24.429187,4.630369,3.512694,22.832508


### What are the top zip codes by evictions per 1000 units?

In [251]:
# 2023
evictions[['Zipcode','evic_per_1000_units_2023']].sort_values('evic_per_1000_units_2023', ascending=False).head(10)

Unnamed: 0,Zipcode,evic_per_1000_units_2023
37,60163,21.73913
107,60602,20.100503
139,60636,13.716525
124,60619,12.184231
62,60422,12.121212
71,60445,11.591963
151,60649,11.094216
126,60621,10.971596
79,60461,10.909091
59,60411,10.538081


In [252]:
# 2022
evictions[['Zipcode','evic_per_1000_units_2022']].sort_values('evic_per_1000_units_2022', ascending=False).head(10)

Unnamed: 0,Zipcode,evic_per_1000_units_2022
63,60425,56.367432
37,60163,50.724638
79,60461,43.636364
59,60411,36.563947
71,60445,35.548686
107,60602,35.175879
61,60419,33.942559
108,60603,32.258065
62,60422,30.30303
139,60636,29.719138


In [253]:
# 2021
evictions[['Zipcode','evic_per_1000_units_2021']].sort_values('evic_per_1000_units_2021', ascending=False).head(10)

Unnamed: 0,Zipcode,evic_per_1000_units_2021
107,60602,40.201005
108,60603,24.193548
62,60422,18.181818
37,60163,14.492754
109,60604,14.388489
93,60478,13.322884
63,60425,12.526096
61,60419,11.749347
88,60472,11.182109
71,60445,10.819165


In [254]:
# 2020
evictions[['Zipcode','evic_per_1000_units_2020']].sort_values('evic_per_1000_units_2020', ascending=False).head(10)

Unnamed: 0,Zipcode,evic_per_1000_units_2020
63,60425,20.876827
79,60461,18.181818
109,60604,17.985612
93,60478,14.890282
91,60476,14.705882
61,60419,12.184508
71,60445,11.591963
124,60619,11.163562
96,60487,10.59322
151,60649,10.418427


In [255]:
# 2019
evictions[['Zipcode','evic_per_1000_units_2019']].sort_values('evic_per_1000_units_2019', ascending=False).head(10)

Unnamed: 0,Zipcode,evic_per_1000_units_2019
63,60425,73.068894
37,60163,72.463768
62,60422,69.69697
93,60478,66.61442
79,60461,61.818182
151,60649,56.259503
89,60473,53.932584
70,60443,52.604435
61,60419,50.478677
107,60602,50.251256


## Merge evictions with other census data

### By race

In [256]:
race = pd.read_csv('processed/race_by_zip_2021_5y_ACS.csv')

In [257]:
del race['Unnamed: 0']

In [258]:
race.head()

Unnamed: 0,Zipcode,Total,Latino,White,Black,Native American,Asian,Pacific Islander,Other,TOM,Pct Latino,Pct White,Pct Black,Pct Native American,Pct Asian,Pct Other,Pct TOM,Majority
0,60004,52344,5180,38917,1457,37,5319,0,47,1387,0.098961,0.743485,0.027835,0.000707,0.101616,0.000898,0.026498,White
1,60005,29622,4987,20588,760,13,2903,0,102,269,0.168355,0.695024,0.025657,0.000439,0.098001,0.003443,0.009081,White
2,60007,33048,3798,23858,776,0,3970,0,32,614,0.114924,0.72192,0.023481,0.0,0.120128,0.000968,0.018579,White
3,60008,23191,6844,13193,811,0,1611,0,118,614,0.295114,0.568884,0.03497,0.0,0.069467,0.005088,0.026476,White
4,60010,47130,1558,37295,421,0,6480,119,251,1006,0.033058,0.791322,0.008933,0.0,0.137492,0.005326,0.021345,White


In [259]:
len(race)

178

In [260]:
len(evictions)

170

In [261]:
evictions.head()

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,evic_per_1000_units_2023,evic_per_1000_units_2022,evic_per_1000_units_2021,evic_per_1000_units_2020,evic_per_1000_units_2019,enforced_per_1000_units_2023,enforced_per_1000_units_2022,enforced_per_1000_units_2021,enforced_per_1000_units_2020,enforced_per_1000_units_2019
0,60004,21.0,11.0,2.0,21.0,10.0,11.0,5.0,2.0,16.0,...,2.113718,4.438808,0.422744,2.32509,4.438808,0.845487,3.381949,0.422744,1.056859,2.32509
1,60005,29.0,7.0,11.0,32.0,12.0,12.0,4.0,10.0,22.0,...,2.667259,7.112692,2.444988,1.555901,6.445877,0.666815,4.889976,2.222716,0.889086,2.667259
2,60007,22.0,8.0,9.0,24.0,8.0,10.0,2.0,9.0,15.0,...,2.506266,7.518797,2.819549,2.506266,6.892231,0.93985,4.699248,2.819549,0.626566,3.132832
3,60008,29.0,4.0,10.0,18.0,9.0,19.0,1.0,8.0,10.0,...,3.579952,7.159905,3.977725,1.59109,11.535402,2.386635,3.977725,3.18218,0.397772,7.557677
4,60010,5.0,4.0,,6.0,4.0,4.0,2.0,,3.0,...,1.911132,2.866699,,1.911132,2.388915,0.955566,1.433349,,0.955566,1.911132


In [262]:
race_evictions = evictions.merge(race, on='Zipcode')

In [296]:
# add percentage change in evictions enforced
race_evictions['pct_change_19_22'] = (race_evictions['2022_enforced'] - race_evictions['2019_enforced'])/race_evictions['2019_enforced']
race_evictions['pct_change_20_22'] = (race_evictions['2022_enforced'] - race_evictions['2020_enforced'])/race_evictions['2020_enforced']

In [297]:
race_evictions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 170 entries, 0 to 169
Data columns (total 54 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Zipcode                       170 non-null    int64  
 1   2019_received                 168 non-null    float64
 2   2020_received                 152 non-null    float64
 3   2021_received                 157 non-null    float64
 4   2022_received                 168 non-null    float64
 5   2023_received                 158 non-null    float64
 6   2019_enforced                 168 non-null    float64
 7   2020_enforced                 133 non-null    float64
 8   2021_enforced                 153 non-null    float64
 9   2022_enforced                 165 non-null    float64
 10  2023_enforced                 133 non-null    float64
 11  enforcement_rate_2019         168 non-null    float64
 12  enforcement_rate_2020         133 non-null    float64
 13  enfor

In [298]:
# Export race_evictions as a master dataset
race_evictions.to_csv('processed/race_evictions.csv')

In [270]:
# group by racial majority
g = race_evictions.groupby('Majority')[['2019_received','2020_received','2021_received','2022_received','2023_received','renter_occupied']].sum()
g['evic_per_1000_units_2019'] = (g['2019_received']/g['renter_occupied'])*1000
g['evic_per_1000_units_2020'] = (g['2020_received']/g['renter_occupied'])*1000
g['evic_per_1000_units_2021'] = (g['2021_received']/g['renter_occupied'])*1000
g['evic_per_1000_units_2022'] = (g['2022_received']/g['renter_occupied'])*1000
g['evic_per_1000_units_2023'] = (g['2023_received']/g['renter_occupied'])*1000
g

Unnamed: 0_level_0,2019_received,2020_received,2021_received,2022_received,2023_received,renter_occupied,evic_per_1000_units_2019,evic_per_1000_units_2020,evic_per_1000_units_2021,evic_per_1000_units_2022,evic_per_1000_units_2023
Majority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Asian,13.0,1.0,3.0,8.0,2.0,1567,8.296107,0.638162,1.914486,5.105297,1.276324
Black,7802.0,1738.0,1254.0,4584.0,1846.0,220486,35.385467,7.882587,5.687436,20.790436,8.372414
Latino,1803.0,372.0,338.0,1182.0,398.0,122177,14.757278,3.044763,2.766478,9.674489,3.257569
No Majority,1853.0,423.0,454.0,1345.0,518.0,196223,9.443337,2.155711,2.313694,6.854446,2.639854
White,2353.0,617.0,687.0,1865.0,651.0,343983,6.840454,1.793693,1.997192,5.42178,1.892535


### Why is the number of evictions so high in majority black zips in 2019?

In [271]:
# top 5 eviction zipcodes are all majority Black
race_evictions.sort_values('2023_received', ascending=False).head()

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,Other,TOM,Pct Latino,Pct White,Pct Black,Pct Native American,Pct Asian,Pct Other,Pct TOM,Majority
151,60649,999.0,185.0,138.0,522.0,197.0,555.0,55.0,109.0,317.0,...,0,592,0.028771,0.028465,0.924142,0.0,0.006003,0.0,0.012088,Black
124,60619,685.0,175.0,114.0,404.0,191.0,369.0,49.0,75.0,279.0,...,317,1172,0.01643,0.009562,0.94789,0.0,0.002646,0.004994,0.018462,Black
140,60637,496.0,127.0,86.0,282.0,132.0,260.0,33.0,61.0,173.0,...,666,1143,0.02781,0.148928,0.734176,0.001999,0.050551,0.013451,0.023084,Black
120,60615,352.0,78.0,64.0,246.0,124.0,180.0,19.0,43.0,153.0,...,192,1939,0.052793,0.282639,0.531951,0.000311,0.081284,0.004597,0.046424,Black
125,60620,601.0,122.0,65.0,327.0,124.0,314.0,29.0,52.0,209.0,...,237,717,0.027928,0.01116,0.943726,0.00049,0.002941,0.003417,0.010338,Black


In [273]:
# same in 2019
race_evictions[race_evictions['Majority'] == 'Black'].sort_values('2019_received', ascending=False).head()

Unnamed: 0,Zipcode,2019_received,2020_received,2021_received,2022_received,2023_received,2019_enforced,2020_enforced,2021_enforced,2022_enforced,...,Other,TOM,Pct Latino,Pct White,Pct Black,Pct Native American,Pct Asian,Pct Other,Pct TOM,Majority
151,60649,999.0,185.0,138.0,522.0,197.0,555.0,55.0,109.0,317.0,...,0,592,0.028771,0.028465,0.924142,0.0,0.006003,0.0,0.012088,Black
124,60619,685.0,175.0,114.0,404.0,191.0,369.0,49.0,75.0,279.0,...,317,1172,0.01643,0.009562,0.94789,0.0,0.002646,0.004994,0.018462,Black
125,60620,601.0,122.0,65.0,327.0,124.0,314.0,29.0,52.0,209.0,...,237,717,0.027928,0.01116,0.943726,0.00049,0.002941,0.003417,0.010338,Black
147,60644,511.0,98.0,62.0,249.0,113.0,264.0,26.0,50.0,163.0,...,70,830,0.091014,0.038022,0.84535,0.0,0.006362,0.00147,0.017426,Black
140,60637,496.0,127.0,86.0,282.0,132.0,260.0,33.0,61.0,173.0,...,666,1143,0.02781,0.148928,0.734176,0.001999,0.050551,0.013451,0.023084,Black


In [71]:
# merge race with df
race_df = df.merge(race, left_on='DefZipCode', right_on='Zipcode')

In [72]:
# examine the raw data for majority black and 2019
black_2019 = race_df[(race_df['Majority'] == 'Black') & (race_df['entered_year'] == 2019)]

In [73]:
len(black_2019)

7802

In [74]:
# Pangea was the highest evictor in 2019 majority Black zip codes
black_2019.groupby('PltName').size().reset_index(name='num evics').sort_values('num evics', ascending=False)

Unnamed: 0,PltName,num evics
2468,PANGEA VENTURES LLC,501
2467,PANGEA VENTURES LLC,281
3589,WPD MANAGEMENT LLC,160
1581,ICARUS INVESTMENT GROUP,120
1040,EAST LAKE MANAGEMENT GROUP INC,72
...,...,...
1396,"GUAJARDO, AARON",1
1397,"GUDEMAN, TOM",1
1399,"GUESH, DOLOMON",1
1400,"GUESH, SOLOMON",1


In [75]:
# group by for just 60649
race_evictions[race_evictions['Zipcode'] == 60649][['evic_per_1000_units_2019','evic_per_1000_units_2020','evic_per_1000_units_2021','evic_per_1000_units_2022','evic_per_1000_units_2023']]

Unnamed: 0,evic_per_1000_units_2019,evic_per_1000_units_2020,evic_per_1000_units_2021,evic_per_1000_units_2022,evic_per_1000_units_2023
152,56.259503,10.418427,7.771583,29.396858,11.094216


### What do evictions look like on a monthly basis by majority race zip code?

Probably just good to check if the ccso's eviction data follows a similar pattern to the court's data.

In [274]:
dfz_month = pd.pivot_table(df, values='CaseNum', index='received_month_year', columns='DefZipCode', aggfunc='count').reset_index()

In [275]:
dfz_month['received_month_year'] = pd.to_datetime(dfz_month['received_month_year'])

In [276]:
dfz_month = dfz_month.sort_values('received_month_year', ascending=True)

In [277]:
dfz_month['received_month_year'] = dfz_month['received_month_year'].dt.strftime('%m/%Y')

In [278]:
dfz_month = dfz_month.transpose()

In [279]:
dfz_month.columns = dfz_month.iloc[0]
dfz_month = dfz_month[1:]

In [280]:
dfz_month = dfz_month.reset_index()

In [281]:
# match to race data
dfz_month = dfz_month.merge(race, left_on = 'DefZipCode', right_on = 'Zipcode')

In [282]:
dfz_month

Unnamed: 0,DefZipCode,01/2019,02/2019,03/2019,04/2019,05/2019,06/2019,07/2019,08/2019,09/2019,...,Other,TOM,Pct Latino,Pct White,Pct Black,Pct Native American,Pct Asian,Pct Other,Pct TOM,Majority
0,60004,,1.0,2.0,1.0,2.0,3.0,1.0,4.0,,...,47,1387,0.098961,0.743485,0.027835,0.000707,0.101616,0.000898,0.026498,White
1,60005,3.0,3.0,2.0,1.0,3.0,,2.0,3.0,2.0,...,102,269,0.168355,0.695024,0.025657,0.000439,0.098001,0.003443,0.009081,White
2,60007,,2.0,2.0,4.0,3.0,2.0,1.0,3.0,1.0,...,32,614,0.114924,0.721920,0.023481,0.000000,0.120128,0.000968,0.018579,White
3,60008,,3.0,1.0,2.0,4.0,4.0,2.0,1.0,,...,118,614,0.295114,0.568884,0.034970,0.000000,0.069467,0.005088,0.026476,White
4,60010,,,1.0,,,,1.0,1.0,1.0,...,251,1006,0.033058,0.791322,0.008933,0.000000,0.137492,0.005326,0.021345,White
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,60714,2.0,4.0,,4.0,4.0,1.0,1.0,7.0,4.0,...,35,626,0.124135,0.654512,0.022142,0.000673,0.177358,0.001122,0.020059,White
168,60803,5.0,5.0,6.0,10.0,2.0,5.0,3.0,2.0,2.0,...,94,393,0.245018,0.505710,0.207578,0.001791,0.018093,0.004210,0.017600,White
169,60804,6.0,5.0,10.0,11.0,13.0,12.0,10.0,4.0,11.0,...,210,218,0.876874,0.084812,0.028432,0.000742,0.004099,0.002473,0.002568,Latino
170,60805,2.0,3.0,,2.0,2.0,2.0,1.0,1.0,1.0,...,150,476,0.146651,0.553812,0.244971,0.001811,0.021273,0.007544,0.023939,White


In [283]:
dfz_month.columns.to_list()

['DefZipCode',
 '01/2019',
 '02/2019',
 '03/2019',
 '04/2019',
 '05/2019',
 '06/2019',
 '07/2019',
 '08/2019',
 '09/2019',
 '10/2019',
 '11/2019',
 '12/2019',
 '01/2020',
 '02/2020',
 '03/2020',
 '05/2020',
 '06/2020',
 '08/2020',
 '09/2020',
 '10/2020',
 '11/2020',
 '12/2020',
 '01/2021',
 '02/2021',
 '03/2021',
 '04/2021',
 '05/2021',
 '06/2021',
 '07/2021',
 '08/2021',
 '09/2021',
 '10/2021',
 '11/2021',
 '12/2021',
 '01/2022',
 '02/2022',
 '03/2022',
 '04/2022',
 '05/2022',
 '06/2022',
 '07/2022',
 '08/2022',
 '09/2022',
 '10/2022',
 '11/2022',
 '12/2022',
 '01/2023',
 '02/2023',
 '03/2023',
 '04/2023',
 'Zipcode',
 'Total',
 'Latino',
 'White',
 'Black',
 'Native American',
 'Asian',
 'Pacific Islander',
 'Other',
 'TOM',
 'Pct Latino',
 'Pct White',
 'Pct Black',
 'Pct Native American',
 'Pct Asian',
 'Pct Other',
 'Pct TOM',
 'Majority']

In [284]:
g = dfz_month.groupby('Majority')[['01/2019','02/2019','03/2019','04/2019','05/2019','06/2019','07/2019','08/2019','09/2019','10/2019','11/2019','12/2019','01/2020','02/2020','03/2020','05/2020','06/2020','08/2020',
 '09/2020','10/2020','11/2020','12/2020','01/2021','02/2021','03/2021','04/2021','05/2021','06/2021','07/2021','08/2021','09/2021','10/2021','11/2021','12/2021','01/2022','02/2022','03/2022','04/2022','05/2022','06/2022',
                                '07/2022','08/2022','09/2022','10/2022','11/2022','12/2022','01/2023','02/2023','03/2023','04/2023']].sum()

In [285]:
# this is what goes into Flourish
g

Unnamed: 0_level_0,01/2019,02/2019,03/2019,04/2019,05/2019,06/2019,07/2019,08/2019,09/2019,10/2019,...,07/2022,08/2022,09/2022,10/2022,11/2022,12/2022,01/2023,02/2023,03/2023,04/2023
Majority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Asian,0.0,2.0,0.0,2.0,0.0,2.0,2.0,1.0,2.0,1.0,...,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
Black,637.0,622.0,568.0,633.0,625.0,605.0,674.0,706.0,686.0,775.0,...,318.0,488.0,494.0,443.0,496.0,526.0,518.0,517.0,719.0,92.0
Latino,145.0,139.0,152.0,145.0,164.0,145.0,166.0,136.0,149.0,181.0,...,84.0,110.0,125.0,111.0,110.0,112.0,104.0,114.0,168.0,12.0
No Majority,131.0,156.0,169.0,185.0,162.0,138.0,151.0,157.0,176.0,167.0,...,91.0,139.0,122.0,138.0,140.0,149.0,146.0,146.0,195.0,31.0
White,179.0,228.0,179.0,211.0,222.0,189.0,167.0,196.0,212.0,214.0,...,134.0,193.0,209.0,185.0,161.0,180.0,206.0,183.0,237.0,27.0


In [286]:
# export the grouped data
g.to_csv('processed/ccso_evictions_month_majority_race.csv')

In [None]:
# do the same but for evictions enforced

In [288]:
dfz_month_enforced = pd.pivot_table(df[df['eviction_completed'] == 'completed'], values='CaseNum', index='completed_month_year', columns='DefZipCode', aggfunc='count').reset_index()
dfz_month_enforced['completed_month_year'] = pd.to_datetime(dfz_month_enforced['completed_month_year'])
dfz_month_enforced = dfz_month_enforced.sort_values('completed_month_year', ascending=True)
dfz_month_enforced['completed_month_year'] = dfz_month_enforced['completed_month_year'].dt.strftime('%m/%Y')
dfz_month_enforced = dfz_month_enforced.transpose()
dfz_month_enforced.columns = dfz_month_enforced.iloc[0]
dfz_month_enforced = dfz_month_enforced[1:]
dfz_month_enforced = dfz_month_enforced.reset_index()
# match to race data
dfz_month_enforced = dfz_month_enforced.merge(race, left_on = 'DefZipCode', right_on = 'Zipcode')

In [291]:
dfz_month_enforced.columns

Index(['DefZipCode', '01/2000', '01/2019', '02/2019', '03/2019', '04/2019',
       '05/2019', '06/2019', '07/2019', '08/2019', '09/2019', '10/2019',
       '11/2019', '12/2019', '01/2020', '02/2020', '03/2020', '06/2020',
       '07/2020', '08/2020', '09/2020', '10/2020', '11/2020', '12/2020',
       '01/2021', '02/2021', '03/2021', '04/2021', '05/2021', '06/2021',
       '07/2021', '08/2021', '09/2021', '10/2021', '11/2021', '12/2021',
       '01/2022', '02/2022', '03/2022', '04/2022', '05/2022', '06/2022',
       '07/2022', '08/2022', '09/2022', '10/2022', '11/2022', '12/2022',
       '01/2023', '02/2023', '03/2023', '04/2023', 'Zipcode', 'Total',
       'Latino', 'White', 'Black', 'Native American', 'Asian',
       'Pacific Islander', 'Other', 'TOM', 'Pct Latino', 'Pct White',
       'Pct Black', 'Pct Native American', 'Pct Asian', 'Pct Other', 'Pct TOM',
       'Majority'],
      dtype='object')

In [292]:
e = dfz_month_enforced.groupby('Majority')[['01/2019','02/2019','03/2019','04/2019','05/2019','06/2019','07/2019','08/2019','09/2019','10/2019','11/2019','12/2019','01/2020','02/2020','03/2020','06/2020','08/2020',
 '09/2020','10/2020','11/2020','12/2020','01/2021','02/2021','03/2021','04/2021','05/2021','06/2021','07/2021','08/2021','09/2021','10/2021','11/2021','12/2021','01/2022','02/2022','03/2022','04/2022','05/2022','06/2022',
                                '07/2022','08/2022','09/2022','10/2022','11/2022','12/2022','01/2023','02/2023','03/2023','04/2023']].sum()

In [293]:
# this is what goes into Flourish
e

Unnamed: 0_level_0,01/2019,02/2019,03/2019,04/2019,05/2019,06/2019,07/2019,08/2019,09/2019,10/2019,...,07/2022,08/2022,09/2022,10/2022,11/2022,12/2022,01/2023,02/2023,03/2023,04/2023
Majority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Asian,0.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Black,2.0,49.0,206.0,437.0,366.0,306.0,328.0,351.0,335.0,360.0,...,170.0,292.0,211.0,241.0,211.0,227.0,254.0,432.0,353.0,42.0
Latino,1.0,4.0,43.0,66.0,124.0,100.0,61.0,74.0,69.0,98.0,...,57.0,58.0,55.0,65.0,70.0,35.0,70.0,63.0,76.0,17.0
No Majority,0.0,11.0,65.0,96.0,98.0,124.0,70.0,65.0,86.0,127.0,...,75.0,82.0,71.0,80.0,82.0,50.0,110.0,90.0,88.0,11.0
White,0.0,17.0,81.0,118.0,137.0,165.0,88.0,64.0,85.0,143.0,...,89.0,91.0,121.0,122.0,103.0,58.0,137.0,117.0,116.0,32.0


In [295]:
# why is jan so low in 2019?
e[['01/2019','01/2020','01/2021', '01/2022','01/2023']]

Unnamed: 0_level_0,01/2019,01/2020,01/2021,01/2022,01/2023
Majority,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Asian,0.0,0.0,0.0,0.0,0.0
Black,2.0,401.0,17.0,135.0,254.0
Latino,1.0,91.0,5.0,19.0,70.0
No Majority,0.0,89.0,12.0,44.0,110.0
White,0.0,140.0,17.0,62.0,137.0


In [294]:
# export the grouped data
e.to_csv('processed/ccso_evictions_enforced_month_majority_race.csv')

## Plaintiffs analysis

### How many records have a missing plaintiff?

In [None]:
# just two based on manual filtering in google sheets

### Most successful plaintiffs

In [111]:
df['CaseNum'].count()

32148

In [265]:
# group by plaintiffs and count of successful and not successful evictions
plaintiffs = pd.pivot_table(df, values='CaseNum', index='PltName', columns='eviction_completed', aggfunc='count').reset_index().sort_values('completed', ascending=False)
plaintiffs['completion_rate'] = plaintiffs['completed']/(plaintiffs['completed'] + plaintiffs['not completed'])

In [268]:
# top 15 most successful evictors
plaintiffs.sort_values('completion_rate', ascending=False).head(15)

eviction_completed,PltName,completed,not completed,completion_rate
7817,KAMBRI REALTY LLC,20.0,1.0,0.952381
10359,"NEGRON, EDWARD",9.0,1.0,0.9
3080,CA PROPERTIES LLC,8.0,1.0,0.888889
5136,ESA MANAGEMENT LLC,8.0,1.0,0.888889
14495,TPG ASSET MANAGEMENT,7.0,1.0,0.875
6909,HOLSTEN MGMT CORP,7.0,1.0,0.875
1931,AXONAS LLC,7.0,1.0,0.875
4275,D&K REAL ESTATE SERVICE CORP,7.0,1.0,0.875
8829,LONDON TOWNE HOUSES COOPERATIVE INC,21.0,3.0,0.875
1420,ALPINE RE LLC,7.0,1.0,0.875


In [118]:
# avg completion rate is 51%
plaintiffs['completion_rate'].describe()

count    2414.000000
mean        0.510933
std         0.138587
min         0.076923
25%         0.500000
50%         0.500000
75%         0.588725
max         0.952381
Name: completion_rate, dtype: float64

In [267]:
# see top fifteen most prolific evictors
plaintiffs.sort_values('completed', ascending=False).head(15)

eviction_completed,PltName,completed,not completed,completion_rate
11021,PANGEA VENTURES LLC,308.0,415.0,0.426003
11022,PANGEA VENTURES LLC,266.0,359.0,0.4256
15803,WPD MANAGEMENT LLC,103.0,123.0,0.455752
4512,DEUTSCHE BANK NATIONAL TRUST COMPANY,93.0,98.0,0.486911
7182,ICARUS INVESTMENT GROUP,83.0,80.0,0.509202
4901,EAST LAKE MANAGEMENT GROUP INC,77.0,53.0,0.592308
5368,FEDERAL NATIONAL MORTGAGE ASSOCIATION,76.0,71.0,0.517007
14162,THE HABITAT COMPANY LLC,75.0,43.0,0.635593
14365,TLC MANAGEMENT CO,71.0,68.0,0.510791
3571,CHICAGO HOUSING AUTHORITY,71.0,40.0,0.63964


In [302]:
plaintiffs['PltName'] = plaintiffs['PltName'].astype(str)

In [303]:
# searching for Harbor Point Venture LLC
plaintiffs.loc[plaintiffs['PltName'].str.contains('harbor', case=False)]

eviction_completed,PltName,completed,not completed,completion_rate
6566,HARBOR POINT VENTURE LLC,8.0,14.0,0.363636
6567,HARBOR POINT VENTURE LLC,7.0,2.0,0.777778
6568,"HARBOR POINT VENTURE, LLC",3.0,1.0,0.75
12714,SAFE HARBOR REALTY LLC,2.0,2.0,0.5
150,155 HARBOR DRIVE CONDO ASSOC,2.0,,
15255,WATERTON NORTH HARBOR TOWER,2.0,2.0,0.5
11091,PARK HARBOR CONDOMINIUM ASSOCIATION,1.0,,
5417,FIELD HARBOR PARKING CONDOMINIUM ASSOCIATION,1.0,,
6569,HARBORSIDE I CONDOMINIUM ASSOCIATION,1.0,,
151,155 HARBOR DRIVE CONDOMINIUM ASSOCIATION,,1.0,


In [304]:
# searching for Pangea
plaintiffs.loc[plaintiffs['PltName'].str.contains('pangea', case=False)]

eviction_completed,PltName,completed,not completed,completion_rate
11021,PANGEA VENTURES LLC,308.0,415.0,0.426003
11022,PANGEA VENTURES LLC,266.0,359.0,0.4256
11023,"PANGEA VENTURES, LLC",36.0,45.0,0.444444
14964,"VENTURES, PANGEA",8.0,2.0,0.8
11020,PANGEA VENTURE LLC,5.0,6.0,0.454545
11024,"PANGEA VENTURS, LLC",1.0,,


## Gentrification analysis (Zillow data)

Source: https://www.zillow.com/research/data/

Data is very incomplete in that it's missing many zipcodes for certain months

In [128]:
# load Zillow zipcode data
zillow = pd.read_csv('source/Zip_zori_sm_month_04_2023.csv')

In [131]:
# 107 zipcodes in Cook County
len(zillow[zillow['CountyName'] == 'Cook County'])

107

In [132]:
# filter data just for cook county
zillow = zillow[zillow['CountyName'] == 'Cook County'].copy()

In [None]:
# where did average rents rise the most? 

In [148]:
# calculate 2022 to 2023 april percent change
zillow['apr_22_23_pct_change'] = (zillow['2023-04-30']-zillow['2022-04-30'])/zillow['2022-04-30']

# count number of NaNs
zillow['apr_22_23_pct_change'].isna().sum()

45

In [154]:
# calculate 2021 to 2023 april percent change
zillow['apr_21_23_pct_change'] = (zillow['2023-04-30']-zillow['2021-04-30'])/zillow['2021-04-30']

# count number of NaNs
zillow['apr_21_23_pct_change'].isna().sum()

52

In [155]:
# calculate 2020 to 2023 april percent change
zillow['apr_20_23_pct_change'] = (zillow['2023-04-30']-zillow['2020-04-30'])/zillow['2020-04-30']

# count number of NaNs
zillow['apr_20_23_pct_change'].isna().sum()

59

In [156]:
# calculate 2019 to 2023 april percent change
zillow['apr_19_23_pct_change'] = (zillow['2023-04-30']-zillow['2019-04-30'])/zillow['2019-04-30']

# count number of NaNs
zillow['apr_19_23_pct_change'].isna().sum()

60

In [157]:
# look
zillow[['RegionName', 'apr_22_23_pct_change','apr_21_23_pct_change','apr_20_23_pct_change','apr_19_23_pct_change','2019-04-30','2020-04-30','2021-04-30','2022-04-30','2023-04-30']].sort_values('apr_22_23_pct_change', ascending=True).head()

Unnamed: 0,RegionName,apr_22_23_pct_change,apr_21_23_pct_change,apr_20_23_pct_change,apr_19_23_pct_change,2019-04-30,2020-04-30,2021-04-30,2022-04-30,2023-04-30
176,60620,0.006208,0.103453,0.200843,0.226041,925.124443,944.536497,1027.90092,1127.242334,1134.240278
363,60619,0.021671,0.118389,0.235771,0.275222,881.02919,909.155621,1004.577191,1099.676967,1123.508088
1717,60025,0.023818,,,,,,,2281.849895,2336.198413
73,60617,0.03189,0.133372,0.19315,0.22605,941.195195,967.147925,1018.158905,1118.290111,1153.952381
6006,60661,0.036932,0.194469,0.113342,0.126674,2079.119755,2104.01553,1961.113934,2259.058614,2342.48961


In [158]:
# left merge zillow data to race_evictions - keep all rows in evictions data
race_evictions_zillow = race_evictions.merge(zillow, left_on='Zipcode', right_on='RegionName', how='left')

In [159]:
# export 
race_evictions_zillow.to_csv('processed/race_evictions_zillow.csv')

In [144]:
# check for completeness of zillow data 
test = race_evictions_zillow.sort_values(2022, ascending=False)[['Zipcode',2022,'2019-01-31','2020-01-31','2021-01-31','2023-04-30']].head(20)
test

Unnamed: 0,Zipcode,2022,2019-01-31,2020-01-31,2021-01-31,2023-04-30
152,60649,522.0,923.293321,955.585043,1027.110725,1170.173333
125,60619,404.0,870.499738,910.132263,992.58773,1123.508088
126,60620,327.0,924.209622,960.442792,1017.547039,1134.240278
141,60637,282.0,1291.822339,1372.984475,1449.874467,1647.899083
133,60628,255.0,973.761673,997.712744,1049.489001,1301.468254
148,60644,249.0,,,959.932345,1152.240741
121,60615,246.0,1534.03408,1636.582417,1641.673866,1869.9431
60,60411,229.0,,,,1411.666667
123,60617,209.0,941.172167,952.902144,1018.687532,1153.952381
134,60629,204.0,,,,1191.333333


In [145]:
test['19_23_pct_change'] = (test['2023-04-30'] - test['2019-01-31'])/test['2019-01-31']

In [146]:
test

Unnamed: 0,Zipcode,2022,2019-01-31,2020-01-31,2021-01-31,2023-04-30,19_23_pct_change
152,60649,522.0,923.293321,955.585043,1027.110725,1170.173333,0.267391
125,60619,404.0,870.499738,910.132263,992.58773,1123.508088,0.290647
126,60620,327.0,924.209622,960.442792,1017.547039,1134.240278,0.227254
141,60637,282.0,1291.822339,1372.984475,1449.874467,1647.899083,0.275639
133,60628,255.0,973.761673,997.712744,1049.489001,1301.468254,0.336537
148,60644,249.0,,,959.932345,1152.240741,
121,60615,246.0,1534.03408,1636.582417,1641.673866,1869.9431,0.218971
60,60411,229.0,,,,1411.666667,
123,60617,209.0,941.172167,952.902144,1018.687532,1153.952381,0.22608
134,60629,204.0,,,,1191.333333,


In [136]:
race_evictions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 171 entries, 0 to 170
Data columns (total 34 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Zipcode                   171 non-null    int64  
 1   2019                      169 non-null    float64
 2   2020                      152 non-null    float64
 3   2021                      157 non-null    float64
 4   2022                      168 non-null    float64
 5   2023                      158 non-null    float64
 6   total                     171 non-null    int64  
 7   total_moe                 171 non-null    int64  
 8   owner_occupied            171 non-null    int64  
 9   owner_occupied_moe        171 non-null    int64  
 10  renter_occupied           171 non-null    int64  
 11  renter_occupied_moe       171 non-null    int64  
 12  evic_per_1000_units_2023  158 non-null    float64
 13  evic_per_1000_units_2022  168 non-null    float64
 14  evic_per_1

## Mapping

In [121]:
# load cook county zipcode geojson
cook_map = gpd.read_file('../geographies/cook_zctas.geojson')

In [122]:
# convert datatype
cook_map['ZCTA'] = cook_map['ZCTA'].astype(int)

In [126]:
# join with dfz_year
evictions_zip_map = cook_map.merge(dfz_year, left_on = 'ZCTA', right_on = 'DefZipCode', how='inner')

In [127]:
# export evictions map
evictions_zip_map.to_file('processed/evictions_zip_map_year.geojson', driver='GeoJSON')

AttributeError: 'int' object has no attribute 'encode'