In [179]:
import pandas as pd
import numpy as np
import csv

In [180]:
#read in data from Chicago Health Atlas
raw_data = pd.read_csv('Y_Data_2.csv')


In [181]:
#Clean data, add new columns for analysis
clean_df = pd.DataFrame().assign(Zipcode=raw_data['Name'], College_12=raw_data['EDE_2008-2012'], College_16=raw_data['EDE_2012-2016'], 
                                College_20=raw_data['EDE_2016-2020'], Income_12=raw_data['INC_2008-2012'], Income_16=raw_data['INC_2012-2016'],
                                Income_20=raw_data['INC_2016-2020'],PCTW_12=raw_data['PCT-W_2008-2012'],PCTW_16=raw_data['PCT-W_2012-2016'],
                                PCTW_20=raw_data['PCT-W_2016-2020'])

clean_df['2016 Eligible'] = 'NAN'
clean_df['2016 Gentrified'] = 'NAN'
clean_df['2020 Eligible'] = 'NAN'
clean_df['2020 Gentrified'] = 'NAN'
clean_df['College_Diff16'] = 'NAN'
clean_df['College_Diff20'] = 'NAN'
clean_df['W_Diff16'] = 'NAN'
clean_df['W_Diff20'] = 'NAN'

clean_df = clean_df.drop(0)


In [182]:
#Convert columns to float values (originally string vlaues)
clean_df['College_12'] = clean_df['College_12'].astype(float) 
clean_df['College_16'] = clean_df['College_16'].astype(float) 
clean_df['College_20'] = clean_df['College_20'].astype(float) 
clean_df['Income_12'] = clean_df['Income_12'].astype(float) 
clean_df['Income_16'] = clean_df['Income_16'].astype(float) 
clean_df['Income_20'] =clean_df['Income_20'].astype(float) 
clean_df['PCTW_12'] = clean_df['PCTW_12'].astype(float) 
clean_df['PCTW_16'] = clean_df['PCTW_16'].astype(float) 
clean_df['PCTW_20'] = clean_df['PCTW_20'].astype(float) 

Chicago medium income data taken from Chicago Health Atlas for the years 2012, 2016 and 2020. 
We will use the threshold that if the medium income of a zipcode is 50% above the Chicago medium
then it is ineligible to be gentrified, else it is eligible.

Chicago medium income in 2012 was 56,129, 50% above is 84,194

Chicago medium income 2016:  56,853, 50% above is 85,280

Chicago medium income 2020:  61,784, 50% above is 92,676




In [183]:
#function to determine if zipcode is eligible to be gentrified
# 1-True it is eligible, 0- False ineligible 
def gent_eligible(df):
    '''
    Return boolean if medium income level is greater than or less than 50% of Chicago's medium income
    '''
    income_16_lst = []
    income_20_lst = []
    
    a = df['Income_16'].tolist()
    b = df['Income_20'].tolist()
 
    
    for val in a:
        if val < 85280.0: #threshold
            income_16_lst.append(1)
        
        else:
             income_16_lst.append(0)
    
    for val in b:
        if val < 92676.0: #threshold
            income_20_lst.append(1)
        else:
            income_20_lst.append(0)
    
    return income_16_lst, income_20_lst
 
    

In [184]:
#Create columns for elgibility variable
eligible_16, eligible_20 = gent_eligible(clean_df)
clean_df['2016 Eligible'] = eligible_16
clean_df['2020 Eligible'] = eligible_20

In [185]:
# Calculate the percentage point difference of percent of population college educated 
# and the percentage point differene in the percent of population that is White
# Have set our threshold for significant to at least 10 percentage points

college_diff16 = clean_df['College_16'] - clean_df['College_12'] 
w_diff16 = clean_df['PCTW_16'] - clean_df['PCTW_12']

college_diff20 = clean_df['College_20'] - clean_df['College_12'] 
w_diff20 = clean_df['PCTW_20'] - clean_df['PCTW_12']

clean_df['College_Diff16'] = college_diff16.tolist()
clean_df['College_Diff20'] = college_diff20.tolist()
clean_df['W_Diff16'] = w_diff16.tolist()
clean_df['W_Diff20'] = w_diff20.tolist()


In [186]:
#function to determine if an eligible zipcode was gentrified (1 - True) or not (0 - False)

def gentrified(df, College_Yr, W_YR):
    '''
    return 0 or 1 if eligible zipcode was gentrified
    
    '''
    a = df['2016 Eligible'].tolist()
 
    gent_lst = []
    
    
    for i, val in enumerate(a):
        if val == 1: 
            if df[College_Yr][i+1] >= 10.0 or df[W_YR][i+1] >= 10.0: #threshold of at least 10 percentage points
                gent_lst.append(1)
            else: 
                gent_lst.append(0)
        else:
            gent_lst.append(0)
            
    return gent_lst
    

In [187]:
#Create columns for gentrified variable
gent_lst_16 = gentrified(clean_df, 'College_Diff16', 'W_Diff16')
gent_lst_20 = gentrified(clean_df, 'College_Diff20', 'W_Diff20')
clean_df['2016 Gentrified'] = gent_lst_16
clean_df['2020 Gentrified'] = gent_lst_20

In [188]:
#Create final dataframe with just zipcode and gentrification (y) variables
fin_df = pd.DataFrame().assign(Zipcode=clean_df['Zipcode'], Eligible2016=clean_df['2016 Eligible'], Gentrified2016 = clean_df['2016 Gentrified'],
                               Eligible2020=clean_df['2020 Eligible'], Gentrified2020 = clean_df['2020 Gentrified'])

In [189]:
#Export both dataframes to csv files
clean_df.to_csv("Zipcode_Complete_Data_Yvalues.csv")
fin_df.to_csv("Zipcode_Yvalues.csv")