# Predicting Impact of South Bend's Housing Program

During the 1,000 Homes in 1000 Days initiative, the City of South Bend demolished or repaired over 1,000 homes that had been left behind by absentee landlords. The effects were not equally distributed. This code combines data on the initiative with Census demographic data from the American Community Survey to analyze the profile of the areas affected by the initiative.

## Import & Clean Data

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

mydir = os.getcwd()
parcels = pd.read_csv(os.path.join(mydir,'Abandoned_Property_Parcels.csv'))

zips = [46637, 46628, 46635, 46616, 46617, 46615, 46601, 46619, 46613, 46614] # 46618, 46623, 46625 are included in data, but don't exist
zips = sorted(zips)
sbdata = np.zeros((10,10))
sbdata[:,0] = zips

asr = pd.read_csv(os.path.join(mydir,'ACS_15_5YR_DP05.csv')) #age, sex, race
asr = asr.drop([0])
asr = asr.sort_values(['GEO.id2'])
med = pd.read_csv(os.path.join(mydir,'ACS_15_5YR_DP02.csv')) #marital female, education, disability
med = med.drop([0])
med = med.sort_values(['GEO.id2'])
um = pd.read_csv(os.path.join(mydir,'ACS_15_5YR_DP03.csv')) #unemployment, median family income
um = um.drop([0])
um = um.sort_values(['GEO.id2'])

## Structure Data

In [57]:
for k in range(0,len(parcels)):
    if round(parcels['Zip_Code'][k],5)*1000 in zips:
        sbdata[zips.index(round(parcels['Zip_Code'][k]*1000,5)),1] += 1

i=1

for k in range(0,len(sbdata)):
    while round(sbdata[k,0]) != float(asr['GEO.id2'][i]) and i<len(asr.index):
        i += 1
    sbdata[k,2] = float(asr['HC03_VC05'][i]) # %female
    sbdata[k,3] = float(asr['HC03_VC29'][i]) # %65+
    sbdata[k,4] = float(asr['HC03_VC49'][i]) # %white

i=1
for k in range(0,len(sbdata)):
    while round(sbdata[k,0]) != float(med['GEO.id2'][i]) and i<len(med.index):
        i += 1
    sbdata[k,5] = float(med['HC03_VC45'][i]) # %females now married, never separated
    sbdata[k,6] = float(med['HC03_VC96'][i]) # %bachelor's degree or higher
    sbdata[k,7] = float(med['HC03_VC106'][i]) # %civilian noninstitutionalized population with disability

i=1
for k in range(0,len(sbdata)):
    while round(sbdata[k,0]) != float(um['GEO.id2'][i]) and i<len(med.index):
        i += 1
    sbdata[k,8] = float(um['HC03_VC07'][i]) # %unemployed
    sbdata[k,9] = float(um['HC01_VC114'][i]) # $median family income


In [58]:
sbdataf = pd.DataFrame(sbdata, index=zips)
sbdataf = sbdataf.drop([0],1)
sbdataf = sbdataf.rename(index=int, columns={1:'# houses', 2:'%fem', 3:'%65+', 4:'%white', 5:'%fem_mar', 6:'%bach', 7:'%disability', 8:'%unemployed', 9:'median_fam_income'})
sbdataf

Unnamed: 0,# houses,%fem,%65+,%white,%fem_mar,%bach,%disability,%unemployed,median_fam_income
46601,60.0,48.1,10.0,40.4,18.8,13.3,23.4,8.5,22845.0
46613,237.0,49.2,7.0,60.6,31.1,9.0,16.0,10.0,31952.0
46614,36.0,51.9,18.8,83.7,52.7,28.7,14.2,3.5,61693.0
46615,45.0,53.6,11.6,73.2,37.4,28.8,12.8,5.2,49579.0
46616,227.0,48.1,10.1,66.8,34.2,28.5,13.7,11.4,38693.0
46617,50.0,51.7,15.2,77.4,40.7,54.5,11.1,3.9,66157.0
46619,203.0,51.9,13.7,64.2,38.1,9.7,14.8,7.6,40317.0
46628,586.0,51.1,13.8,59.7,41.9,22.2,15.4,9.0,48115.0
46635,1.0,53.5,23.7,83.3,53.3,36.0,17.2,3.5,67995.0
46637,1.0,50.3,15.5,83.6,47.2,31.7,11.4,3.6,61323.0
