In [45]:
import pandas as pd

In [46]:
# Import ny income file
demog_df = pd.read_csv('https://raw.githubusercontent.com/tawney-kirkland/Metis_Project_1/main/data/ny_income.csv',
                      usecols = ['GeoType', 'GeogName', 'GeoID', 'Borough','Pop16plE','F16plE','LFE','HH2E',
                       'HHI75t99E','HI100t149E','HI150t199E', 'HHI200plE', 'MdHHIncE', 'MnHHIncE', 
                        'InfoE', 'FIREE', 'PrfSMgAWME','EdHlthCSAE','PubAdminE'])

In [47]:
demog_df.rename(columns = {'Pop16plE': 'above_16_pop',
                            'F16plE' : 'fem_above_16_pop',
                           'LFE' : 'labor_force',
                            'HH2E' : 'total_households',
                           'HHI75t99E' : 'income_75_99k',
                           'HI100t149E' : 'income_100_149k',
                            'HI150t199E' : 'income_150_199k',
                            'HHI200plE': 'income_200k_over',
                           'MdHHIncE': 'income_median',
                           'MnHHIncE': 'income_mean',
                            'InfoE': 'information_occ',
                            'FIREE': 'finServ_occ', 
                            'PrfSMgAWME': 'profesServ_occ',
                            'EdHlthCSAE': 'edhealth_occ',
                            'PubAdminE': 'pubAdmin_occ'},
                           
                 inplace=True
                )

In [48]:
demog_df.head()

Unnamed: 0,GeoType,GeogName,GeoID,Borough,above_16_pop,labor_force,fem_above_16_pop,information_occ,finServ_occ,profesServ_occ,edhealth_occ,pubAdmin_occ,total_households,income_75_99k,income_100_149k,income_150_199k,income_200k_over,income_median,income_mean
0,NTA2010,Bath Beach,BK27,Brooklyn,26855,16006,13703,401,1392,1482,4053,571,11428,1352,1758,706,585,58243.0,75950.0
1,NTA2010,Bay Ridge,BK31,Brooklyn,65479,42176,34372,1464,4463,5179,10126,2341,33487,4108,4827,2497,2775,63539.0,86319.0
2,NTA2010,Bedford,BK75,Brooklyn,54904,36993,29640,1688,2303,4217,9005,1062,26059,2269,2847,1195,1034,39970.0,62616.0
3,NTA2010,Bensonhurst East,BK29,Brooklyn,52857,32004,27592,580,2136,2828,7677,756,22187,2558,2416,1144,844,48097.0,65715.0
4,NTA2010,Bensonhurst West,BK28,Brooklyn,74470,45842,38579,958,3573,4223,10021,1161,29909,3428,3743,1649,1231,51035.0,70198.0


In [49]:
#Calculate the female population in each area
demog_df['fem_pop_percent'] = demog_df['fem_above_16_pop'] / demog_df['above_16_pop'] 

In [50]:
# Calculate percent in particular industries
demog_df['inform_occ_percent'] = demog_df['information_occ'] / demog_df['labor_force'] 
demog_df['finServ_occ_percent'] = demog_df['finServ_occ'] / demog_df['labor_force'] 
demog_df['profesServ_occ_percent'] = demog_df['profesServ_occ'] / demog_df['labor_force'] 
demog_df['edhealth_occ_percent'] = demog_df['edhealth_occ'] / demog_df['labor_force'] 
demog_df['pubAdmin_occ_percent'] = demog_df['pubAdmin_occ'] / demog_df['labor_force'] 

In [51]:
# Calculate the percent of households in each area within the specified income ranges
demog_df['percent_75_99k'] = demog_df['income_75_99k'] / demog_df['total_households'] 
demog_df['percent_100_149k'] = demog_df['income_100_149k'] / demog_df['total_households'] 
demog_df['percent_150_199k'] = demog_df['income_150_199k'] / demog_df['total_households']
demog_df['percent_200_over'] = demog_df['income_200k_over'] / demog_df['total_households']

# Sum percent of households with more than 100k income to determine a thresh hold
demog_df['percent_above_100k'] = demog_df['percent_100_149k'] + demog_df['percent_150_199k'] + demog_df['percent_200_over']
demog_df['percent_above_150k'] = demog_df['percent_150_199k'] + demog_df['percent_200_over']

In [52]:
demog_df.sort_values('percent_above_150k', ascending=False).head(20)

Unnamed: 0,GeoType,GeogName,GeoID,Borough,above_16_pop,labor_force,fem_above_16_pop,information_occ,finServ_occ,profesServ_occ,...,finServ_occ_percent,profesServ_occ_percent,edhealth_occ_percent,pubAdmin_occ_percent,percent_75_99k,percent_100_149k,percent_150_199k,percent_200_over,percent_above_100k,percent_above_150k
74,NTA2010,Upper East Side-Carnegie Hill,MN40,Manhattan,49438,31325,28282,1470,8763,7009,...,0.279745,0.223751,0.178739,0.019633,0.099536,0.13551,0.099709,0.411832,0.647051,0.511541
51,NTA2010,Battery Park City-Lower Manhattan,MN25,Manhattan,39203,31640,20738,2126,8663,9261,...,0.273799,0.292699,0.136726,0.015992,0.095135,0.200417,0.112712,0.330842,0.64397,0.443554
71,NTA2010,SoHo-TriBeCa-Civic Center-Little Italy,MN24,Manhattan,36989,26650,18788,1797,6431,6570,...,0.241313,0.246529,0.134221,0.006529,0.076447,0.139036,0.080609,0.355228,0.574873,0.435838
63,NTA2010,Lincoln Square,MN14,Manhattan,51778,36884,28597,2926,8214,7593,...,0.222698,0.205862,0.215405,0.019385,0.085592,0.14742,0.125034,0.304088,0.576543,0.429123
78,NTA2010,West Village,MN23,Manhattan,61658,44859,30982,3740,8992,10814,...,0.20045,0.241066,0.181658,0.015382,0.09134,0.150192,0.12372,0.290522,0.564434,0.414242
73,NTA2010,Turtle Bay-East Midtown,MN19,Manhattan,45690,34323,25152,2251,7994,8484,...,0.232905,0.247181,0.143869,0.037438,0.106102,0.170053,0.109482,0.294574,0.574109,0.404056
67,NTA2010,Midtown-Midtown South,MN17,Manhattan,26120,18054,13277,1147,5187,4371,...,0.287305,0.242107,0.11233,0.014069,0.071387,0.148514,0.098601,0.304508,0.551622,0.403108
59,NTA2010,Gramercy,MN21,Manhattan,26343,19817,14560,2192,4137,4395,...,0.20876,0.221779,0.155725,0.011152,0.123032,0.186843,0.116468,0.265264,0.568575,0.381732
7,NTA2010,Brooklyn Heights-Cobble Hill,BK09,Brooklyn,21110,14418,10684,1335,2440,3999,...,0.169233,0.277362,0.19011,0.026772,0.108052,0.180506,0.119171,0.262285,0.561962,0.381456
61,NTA2010,Hudson Yards-Chelsea-Flat Iron-Union Square,MN13,Manhattan,69699,53890,33336,4492,11052,12029,...,0.205084,0.223214,0.143756,0.023808,0.083254,0.159695,0.099658,0.275401,0.534755,0.375059


In [54]:
demog_short = demog_df.drop(columns=
                            ['information_occ','finServ_occ','profesServ_occ',
                             'edhealth_occ','pubAdmin_occ','income_75_99k',
                             'income_100_149k','income_150_199k','income_200k_over',
                             'income_mean','income_median','percent_75_99k'])

In [55]:
demog_short.sort_values('percent_above_150k', ascending=False).head(10)

Unnamed: 0,GeoType,GeogName,GeoID,Borough,above_16_pop,labor_force,fem_above_16_pop,total_households,fem_pop_percent,inform_occ_percent,finServ_occ_percent,profesServ_occ_percent,edhealth_occ_percent,pubAdmin_occ_percent,percent_100_149k,percent_150_199k,percent_200_over,percent_above_100k,percent_above_150k
74,NTA2010,Upper East Side-Carnegie Hill,MN40,Manhattan,49438,31325,28282,28854,0.57207,0.046927,0.279745,0.223751,0.178739,0.019633,0.13551,0.099709,0.411832,0.647051,0.511541
51,NTA2010,Battery Park City-Lower Manhattan,MN25,Manhattan,39203,31640,20738,22074,0.52899,0.067193,0.273799,0.292699,0.136726,0.015992,0.200417,0.112712,0.330842,0.64397,0.443554
71,NTA2010,SoHo-TriBeCa-Civic Center-Little Italy,MN24,Manhattan,36989,26650,18788,19700,0.507935,0.06743,0.241313,0.246529,0.134221,0.006529,0.139036,0.080609,0.355228,0.574873,0.435838
63,NTA2010,Lincoln Square,MN14,Manhattan,51778,36884,28597,32655,0.5523,0.07933,0.222698,0.205862,0.215405,0.019385,0.14742,0.125034,0.304088,0.576543,0.429123
78,NTA2010,West Village,MN23,Manhattan,61658,44859,30982,36906,0.502481,0.083372,0.20045,0.241066,0.181658,0.015382,0.150192,0.12372,0.290522,0.564434,0.414242
73,NTA2010,Turtle Bay-East Midtown,MN19,Manhattan,45690,34323,25152,28991,0.550492,0.065583,0.232905,0.247181,0.143869,0.037438,0.170053,0.109482,0.294574,0.574109,0.404056
67,NTA2010,Midtown-Midtown South,MN17,Manhattan,26120,18054,13277,15507,0.508308,0.063532,0.287305,0.242107,0.11233,0.014069,0.148514,0.098601,0.304508,0.551622,0.403108
59,NTA2010,Gramercy,MN21,Manhattan,26343,19817,14560,14167,0.552708,0.110612,0.20876,0.221779,0.155725,0.011152,0.186843,0.116468,0.265264,0.568575,0.381732
7,NTA2010,Brooklyn Heights-Cobble Hill,BK09,Brooklyn,21110,14418,10684,11152,0.506111,0.092593,0.169233,0.277362,0.19011,0.026772,0.180506,0.119171,0.262285,0.561962,0.381456
61,NTA2010,Hudson Yards-Chelsea-Flat Iron-Union Square,MN13,Manhattan,69699,53890,33336,42124,0.478285,0.083355,0.205084,0.223214,0.143756,0.023808,0.159695,0.099658,0.275401,0.534755,0.375059


In [57]:
demog_short.sort_values('inform_occ_percent', ascending=False).head(10)

Unnamed: 0,GeoType,GeogName,GeoID,Borough,above_16_pop,labor_force,fem_above_16_pop,total_households,fem_pop_percent,inform_occ_percent,finServ_occ_percent,profesServ_occ_percent,edhealth_occ_percent,pubAdmin_occ_percent,percent_100_149k,percent_150_199k,percent_200_over,percent_above_100k,percent_above_150k
59,NTA2010,Gramercy,MN21,Manhattan,26343,19817,14560,14167,0.552708,0.110612,0.20876,0.221779,0.155725,0.011152,0.186843,0.116468,0.265264,0.568575,0.381732
39,NTA2010,Prospect Heights,BK64,Brooklyn,17456,13431,9125,9331,0.522743,0.109374,0.082942,0.205346,0.233713,0.038716,0.190226,0.106634,0.155503,0.452363,0.262137
37,NTA2010,Park Slope-Gowanus,BK37,Brooklyn,60537,47570,31695,31147,0.523564,0.096973,0.098739,0.226676,0.224385,0.032457,0.188686,0.132051,0.239285,0.560022,0.371336
34,NTA2010,North Side-South Side,BK73,Brooklyn,45544,34166,22712,23134,0.498683,0.096148,0.084997,0.229117,0.166452,0.021278,0.153583,0.094752,0.131235,0.379571,0.225988
12,NTA2010,Carroll Gardens-Columbia Street-Red Hook,BK33,Brooklyn,34479,24935,18286,18111,0.530352,0.095127,0.095208,0.216002,0.19615,0.028314,0.149964,0.110596,0.207443,0.468003,0.318039
7,NTA2010,Brooklyn Heights-Cobble Hill,BK09,Brooklyn,21110,14418,10684,11152,0.506111,0.092593,0.169233,0.277362,0.19011,0.026772,0.180506,0.119171,0.262285,0.561962,0.381456
50,NTA2010,Windsor Terrace,BK40,Brooklyn,17522,12907,9424,9135,0.537838,0.090571,0.072054,0.207872,0.240102,0.05005,0.217406,0.087794,0.14844,0.45364,0.236234
29,NTA2010,Greenpoint,BK76,Brooklyn,30264,22836,15766,15869,0.520949,0.089946,0.060606,0.21869,0.149632,0.023121,0.187157,0.074422,0.085576,0.347155,0.159997
58,NTA2010,East Village,MN22,Manhattan,41068,28568,20260,22352,0.493328,0.089611,0.143552,0.242159,0.148383,0.021318,0.157301,0.085227,0.136766,0.379295,0.221994
22,NTA2010,East Williamsburg,BK90,Brooklyn,31807,21940,16707,15964,0.525262,0.087922,0.058204,0.1768,0.170374,0.017685,0.140566,0.064019,0.068654,0.27324,0.132674
