In [1]:
import pandas as pd

## Family Income Data (2019 ACS):

In [2]:
df1 = pd.read_csv('CensusIncome.csv')
# Select and rename columns of interest:
df_income = df1[['NAME','S1901_C01_013E','S1901_C02_013E']]
df_income = df_income.drop([0, 0])
df_income.rename(columns={'NAME': 'ZCTA','S1901_C01_013E': 'Household_income',
                          'S1901_C02_013E': 'Family_income'}, inplace=True)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
df_income = df_income[~df_income['Household_income'].str.contains("N")]
df_income = df_income[~df_income['Household_income'].str.contains("-")]
df_income["Household_income"] = df_income["Household_income"].astype(str).astype(float)
df_income = df_income[~df_income['Family_income'].str.contains("N")]
df_income = df_income[~df_income['Family_income'].str.contains("-")]
df_income["Family_income"] = df_income["Family_income"].astype(str).astype(float)
df_income["Family_income"].describe()

count     31435.000000
mean      90489.174201
std       41463.700939
min        6461.000000
25%       66537.000000
50%       80938.000000
75%      101635.000000
max      594571.000000
Name: Family_income, dtype: float64

In [4]:
df_income["Household_income"].describe()

count     31435.000000
mean      77907.536949
std       34622.791868
min        7293.000000
25%       57577.500000
50%       70006.000000
75%       87982.000000
max      534451.000000
Name: Household_income, dtype: float64

## Income by Type Data:

In [5]:
df2 = pd.read_csv('Census_income_type.csv')
df_income_type = df2[['NAME','S1902_C01_001E','S1902_C01_002E','S1902_C01_003E','S1902_C01_004E','S1902_C01_006E','S1902_C01_008E']]
df_income_type = df_income_type.drop([0, 0])
df_income_type.rename(columns={'NAME': 'ZCTA','S1902_C01_001E': 'AllHouses',
                          'S1902_C01_002E': 'AllWithIncome','S1902_C01_003E': 'Wages_Salary','S1902_C01_004E': 'SelfEmployment',
                          'S1902_C01_006E': 'SocialSecurity','S1902_C01_008E': 'FoodStamps'}, inplace=True)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
df_income_type["AllHouses"] = df_income_type["AllHouses"].astype(str).astype(float)
df_income_type["AllWithIncome"] = df_income_type["AllWithIncome"].astype(str).astype(float)
df_income_type["Wages_Salary"] = df_income_type["Wages_Salary"].astype(str).astype(float)
df_income_type["SelfEmployment"] = df_income_type["SelfEmployment"].astype(str).astype(float)
df_income_type["SocialSecurity"] = df_income_type["SocialSecurity"].astype(str).astype(float)
df_income_type["FoodStamps"] = df_income_type["FoodStamps"].astype(str).astype(float)

In [7]:
# Calculate % of Houses with members on food stamps and add new column to df:
stamps_pcnt = df_income_type.FoodStamps/df_income_type.AllHouses
df_income_type = df_income_type.merge(stamps_pcnt.to_frame('StampsPcnt'), left_index=True, right_index=True)

In [8]:
df_income_type['StampsPcnt'].describe()

count    32519.000000
mean         0.125320
std          0.112081
min          0.000000
25%          0.050554
50%          0.100610
75%          0.168464
max          1.000000
Name: StampsPcnt, dtype: float64

## Housing Density Data:

In [9]:
df3 = pd.read_csv('CensusHouseholdSize.csv')
df_housing = df3[['NAME','S2501_C02_005E','S2501_C02_008E','S2501_C04_005E','S2501_C06_005E']]
df_housing = df_housing.drop([0, 0])
df_housing.rename(columns={'NAME': 'ZCTA','S2501_C02_005E': 'Total_Prcnt_4_persons',
                           'S2501_C02_008E': 'Prcnt_High_Occ','S2501_C04_005E': 'Owner_Prcnt_4_persons',
                           'S2501_C06_005E': 'Renter_Prcnt_4_persons',}, inplace=True)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [10]:
df_housing = df_housing[~df_housing['Total_Prcnt_4_persons'].str.contains("-")]
df_housing["Total_Prcnt_4_persons"] = df_housing["Total_Prcnt_4_persons"].astype(str).astype(float)
df_housing["Prcnt_High_Occ"] = df_housing["Prcnt_High_Occ"].astype(str).astype(float)
df_housing = df_housing[~df_housing['Owner_Prcnt_4_persons'].str.contains("-")]
df_housing["Owner_Prcnt_4_persons"] = df_housing["Owner_Prcnt_4_persons"].astype(str).astype(float)
df_housing = df_housing[~df_housing['Renter_Prcnt_4_persons'].str.contains("-")]
df_housing["Renter_Prcnt_4_persons"] = df_housing["Renter_Prcnt_4_persons"].astype(str).astype(float)

## Merge All Dataframes:

In [11]:
df_data = pd.merge(df_income, df_income_type, on='ZCTA')
df_data = pd.merge(df_data, df_housing, on='ZCTA')

In [12]:
# Print the means for each column:
df_data.mean(axis=0)

  df_data.mean(axis=0)


Household_income          77860.081399
Family_income             90534.355079
AllHouses                  3975.993339
AllWithIncome              3078.623992
Wages_Salary               2966.027394
SelfEmployment              430.019982
SocialSecurity             1245.270872
FoodStamps                  503.320632
StampsPcnt                    0.125552
Total_Prcnt_4_persons        20.940977
Prcnt_High_Occ                0.790730
Owner_Prcnt_4_persons        20.883622
Renter_Prcnt_4_persons       21.378650
dtype: float64

In [13]:
# 514 control zip codes
control_zip_list = ['10301','10302','10303','10304','10305','10306','10307','10308','10309',
                    '10310','10311','10312','10314','10451','10452','10453','10454','10455',
                    '10456','10457','10458','10459','10460','10461','10462','10463','10464',
                    '10465','10466','10467','10468','10469','10470','10471','10472','10473',
                    '10474','10475','10501','10502','10504','10505','10506','10507','10509','10510','10511','10512','10514','10516','10518','10520','10522','10523','10524','10526','10527','10528','10530','10532','10533','10535','10536','10537','10538','10541','10543','10546','10547','10548','10549','10550','10552','10553','10560','10562','10566','10567','10570','10573','10576','10577','10578','10579','10580','10583','10588','10589','10590','10591','10594','10595','10597','10598','10601','10603','10604','10605','10606','10607','10701','10703','10704','10705','10706','10707','10708','10709','10710','10801','10803','10804','10805','10901','10911','10913','10916','10917','10918','10919','10920','10921','10923','10924','10925','10926','10927','10928','10930','10931','10940','10941','10950','10952','10954','10956','10958','10960','10962','10963','10964','10965','10968','10969','10970','10973','10974','10975','10976','10977','10980','10983','10984','10985','10986','10987','10989','10990','10992','10993','10994','10996','10998','11001','11003','11004','11005','11010','11020','11021','11023','11024','11030','11040','11042','11050','11096','11101','11102','11103','11104','11105','11106','11109','11201','11203','11204','11205','11206','11207','11208','11209','11210','11211','11212','11213','11214','11215','11216','11217','11218','11219','11220','11221','11222','11223','11224','11225','11226','11228','11229','11230','11231','11232','11233','11234','11235','11236','11237','11238','11239','11240','11241','11242','11243','11249','11252','11256','11351','11354','11355','11356','11357','11358','11359','11360','11361','11362','11363','11364','11365','11366','11367','11368','11369','11370','11371','11372','11373','11374','11375','11377','11378','11379','11385','11411','11412','11413','11414','11415','11416','11417','11418','11419','11420','11421','11422','11423','11426','11427','11428','11429','11430','11432','11433','11434','11435','11436','11501','11507','11509','11510','11514','11516','11518','11520','11530','11542','11545','11548','11550','11552','11553','11554','11556','11557','11558','11559','11560','11561','11563','11565','11566','11568','11570','11572','11575','11576','11577','11579','11580','11581','11590','11596','11598','11599','11691','11692','11693','11694','11697','11709','11710','11714','11732','11735','11737','11753','11756','11758','11762','11765','11771','11783','11791','11793','11797','11801','11803','11804','12518','12520','12543','12549','12550','12553','12563','12575','12577','12586','12729','12746','12771','12780','07001','07008','07016','07023','07027','07033','07036','07060','07062','07063','07064','07065','07066','07067','07076','07077','07080','07081','07083','07088','07090','07092','07095','07201','07202','07203','07204','07205','07206','07208','07701','07702','07703','07704','07711','07712','07716','07717','07718','07719','07720','07721','07722','07723','07724','07726','07727','07728','07730','07731','07732','07733','07734','07735','07737','07738','07739','07740','07746','07747','07748','07750','07751','07753','07755','07756','07757','07758','07760','07762','07764','07799','07901','07922','07974','08005','08008','08037','08050','08087','08092','08201','08203','08205','08215','08221','08225','08232','08234','08241','08244','08310','08317','08319','08326','08330','08340','08341','08346','08350','08401','08402','08403','08406','08501','08510','08512','08514','08520','08525','08527','08533','08534','08535','08536','08540','08542','08550','08560','08608','08609','08610','08611','08618','08619','08620','08628','08629','08638','08648','08690','08691','08701','08721','08722','08723','08724','08730','08731','08733','08734','08735','08736','08738','08741','08742','08750','08751','08752','08753','08755','08757','08758','08759','08810','08812','08816','08817','08820','08824','08828','08830','08831','08832','08837','08840','08846','08850','08852','08854','08857','08859','08861','08863','08872','08879','08882','08884','08899','08901','08902','08904']
df_control  = df_data[df_data.ZCTA.str.contains('|'.join(control_zip_list ))]

In [14]:
df_data.mean(axis=0)

  df_data.mean(axis=0)


Household_income          77860.081399
Family_income             90534.355079
AllHouses                  3975.993339
AllWithIncome              3078.623992
Wages_Salary               2966.027394
SelfEmployment              430.019982
SocialSecurity             1245.270872
FoodStamps                  503.320632
StampsPcnt                    0.125552
Total_Prcnt_4_persons        20.940977
Prcnt_High_Occ                0.790730
Owner_Prcnt_4_persons        20.883622
Renter_Prcnt_4_persons       21.378650
dtype: float64

In [15]:
df_control.mean(axis=0)

  df_control.mean(axis=0)


Household_income          126466.898833
Family_income             145024.933852
AllHouses                   9034.359922
AllWithIncome               7105.782101
Wages_Salary                6888.235409
SelfEmployment               915.618677
SocialSecurity              2722.221790
FoodStamps                  1392.484436
StampsPcnt                     0.100098
Total_Prcnt_4_persons         27.567510
Prcnt_High_Occ                 1.378599
Owner_Prcnt_4_persons         29.822957
Renter_Prcnt_4_persons        23.714397
dtype: float64

In [16]:
df_data['Family_income'].describe()

count     30627.000000
mean      90534.355079
std       41094.303378
min       11716.000000
25%       66751.000000
50%       81042.000000
75%      101575.500000
max      594571.000000
Name: Family_income, dtype: float64

In [17]:
df_control['Family_income'].describe()

count       514.000000
mean     145024.933852
std       67016.722883
min       44362.000000
25%       99844.250000
50%      130173.000000
75%      173036.500000
max      515591.000000
Name: Family_income, dtype: float64

In [18]:
df_data['Household_income'].describe()

count     30627.000000
mean      77860.081399
std       34176.646169
min       10663.000000
25%       57703.500000
50%       70030.000000
75%       87829.000000
max      511713.000000
Name: Household_income, dtype: float64

In [19]:
df_data['StampsPcnt'].describe()

count    30627.000000
mean         0.125552
std          0.100047
min          0.000000
25%          0.055354
50%          0.103279
75%          0.168831
max          0.911111
Name: StampsPcnt, dtype: float64

In [20]:
df_control['StampsPcnt'].describe()

count    514.000000
mean       0.100098
std        0.104028
min        0.000000
25%        0.031544
50%        0.060357
75%        0.135254
max        0.537791
Name: StampsPcnt, dtype: float64

In [21]:
toms = df_data[df_data.ZCTA.str.contains('11691')]
toms.mean(axis=0)

  toms.mean(axis=0)


Household_income          72476.000000
Family_income             82148.000000
AllHouses                 20712.000000
AllWithIncome             15623.000000
Wages_Salary              15163.000000
SelfEmployment             1938.000000
SocialSecurity             5140.000000
FoodStamps                 7576.000000
StampsPcnt                    0.365778
Total_Prcnt_4_persons        33.000000
Prcnt_High_Occ                3.500000
Owner_Prcnt_4_persons        44.900000
Renter_Prcnt_4_persons       29.500000
dtype: float64