In [1]:
import pandas as pd
import numpy as np
import censusdata as cd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

%config InlineBackend.figure_format = 'svg'
%matplotlib inline 
sns.set(color_codes=True, style='whitegrid')

pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.precision', 2)

In [6]:
# Load all dfs for comparison
age_eliza = pd.read_csv('Ordered Age.csv')
disability_eliza = pd.read_csv('Ordered Disability.csv')

with open('cleaned_data_combined_df.pickle', 'rb') as read_file:
    combined = pickle.load(read_file)

In [7]:
combined.head()

Unnamed: 0,zip_code,actual_population,percent_disabled,percent_under_age_5,percent_over_age_60,percent_age_under_5_over_60,Sample population with a cognitive difficulty,Sample percent with a cognitive difficulty,Sample population with a vision difficulty,Sample percent with a vision difficulty,Sample population with an independent living difficulty,Sample percent with an independent living difficulty,Sample population with a self-care difficulty,Sample percent with a self-care difficulty,Sample population with an ambulatory difficulty,Sample percent with an ambulatory difficulty,Sample total sample population,Sample population under 5 years,Sample percent population under 5 years,Sample population 60 years and over,Sample percent population 60 years and over,actual_population_disabled,actual_population_under_age_5,actual_population_over_age_60,actual_population_under_age_5_over_60
0,10001,22924.0,15.16,2.06,21.05,23.11,745.0,3.3,436.0,1.9,570.0,2.7,521.0,2.3,1203.0,5.4,22924.0,472.0,2.1,4825.0,21.0,3475.0,472.0,4825.0,5297.0
1,10002,74993.0,32.35,3.52,28.25,31.77,4579.0,6.3,3050.0,4.1,5634.0,8.7,3473.0,4.8,7524.0,10.4,74993.0,2637.0,3.5,21188.0,28.3,24260.0,2637.0,21188.0,23825.0
2,10003,54682.0,10.28,3.23,16.92,20.15,1426.0,2.7,953.0,1.7,1177.0,2.3,439.0,0.8,1629.0,3.1,54682.0,1767.0,3.2,9253.0,16.9,5624.0,1767.0,9253.0,11020.0
3,10004,3028.0,5.58,4.13,13.71,17.83,46.0,1.6,59.0,1.9,29.0,1.1,0.0,0.0,35.0,1.2,3028.0,125.0,4.1,415.0,13.7,169.0,125.0,415.0,540.0
4,10005,8831.0,1.74,5.91,1.66,7.58,100.0,1.2,6.0,0.1,3.0,0.0,1.0,0.0,44.0,0.5,8831.0,522.0,5.9,147.0,1.7,154.0,522.0,147.0,669.0


In [11]:
combined[['zip_code', 'actual_population_disabled']].sort_values('actual_population_disabled', ascending=False)

Unnamed: 0,zip_code,actual_population_disabled
1,10002,24260.0
26,10029,24241.0
29,10032,18119.0
22,10025,17545.0
30,10033,15222.0
7,10009,13695.0
28,10031,12668.0
24,10027,11386.0
32,10035,10825.0
20,10023,9738.0


In [16]:
combined_eliza = age_eliza.merge(disability_eliza, how='left', on=['Zip Code', 'Total Population'])

In [19]:
pure_zip_codes = [x[-5:] for x in combined_eliza['Zip Code'].values]

In [22]:
update_cols = combined_eliza.columns.to_list()

In [24]:
update_cols = [
    'Zip Code',
    'Estimated Disability Population',
    'Estimated Age of Population:Under 5 and Over 60',
    'Total Population',
    'Estimated Percentage of Population by ZipCode: Under 5 an Over 60',
    'Estimated Percentage of Population by Total: Under 5 an Over 60',
    'Percentage of Population with Disability by Zipcode',
    'Percentage of Population with Disability by Total'
]

In [25]:
combined_eliza = combined_eliza[update_cols]

In [27]:
combined_eliza['Zip Code'] = pure_zip_codes

In [31]:
combined_eliza.rename(columns={'Zip Code': 'zip_code'}, inplace=True)

In [32]:
combined_eliza.head()

Unnamed: 0,zip_code,Estimated Disability Population,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total
0,10025,17545,28931,92805,0.31,0.02,0.19,0.0109
1,10002,24260,23825,74993,0.32,0.01,0.32,0.0151
2,10023,9738,21116,62435,0.34,0.01,0.16,0.00605
3,10024,8657,19019,59001,0.32,0.01,0.15,0.00538
4,10029,24241,18966,79597,0.24,0.01,0.3,0.0151


In [33]:
with open('combined_eliza_df.pickle', 'wb') as to_write:
    pickle.dump(combined_eliza, to_write)

In [34]:
combined_binh_eliza = combined.merge(combined_eliza, how='left', on='zip_code')

In [37]:
combined_binh_eliza[combined_binh_eliza.isna().any(axis=1)]

Unnamed: 0,zip_code,actual_population,percent_disabled,percent_under_age_5,percent_over_age_60,percent_age_under_5_over_60,Sample population with a cognitive difficulty,Sample percent with a cognitive difficulty,Sample population with a vision difficulty,Sample percent with a vision difficulty,Sample population with an independent living difficulty,Sample percent with an independent living difficulty,Sample population with a self-care difficulty,Sample percent with a self-care difficulty,Sample population with an ambulatory difficulty,Sample percent with an ambulatory difficulty,Sample total sample population,Sample population under 5 years,Sample percent population under 5 years,Sample population 60 years and over,Sample percent population 60 years and over,actual_population_disabled,actual_population_under_age_5,actual_population_over_age_60,actual_population_under_age_5_over_60,Estimated Disability Population,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total
17,10020,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
41,10128,59256.0,,,,,,,,,,,,,,,,,,,,,,,,7932.0,0.0,59256.0,0.0,0.0,0.13,0.00493


In [39]:
combined_binh_eliza['disabled_population_difference'] = (combined_binh_eliza['actual_population_disabled'] 
                                                         - combined_binh_eliza['Estimated Disability Population'])

In [44]:
combined_binh_eliza['relevant_age_population_difference'] = (combined_binh_eliza['actual_population_under_age_5_over_60'] 
                                                             - combined_binh_eliza['Estimated Age of Population:Under 5 and Over 60'])

In [47]:
with open('combined_binh_eliza_df.pickle', 'wb') as to_write:
    pickle.dump(combined_binh_eliza, to_write)

In [50]:
combined_binh_eliza.describe()

Unnamed: 0,actual_population,percent_disabled,percent_under_age_5,percent_over_age_60,percent_age_under_5_over_60,Sample population with a cognitive difficulty,Sample percent with a cognitive difficulty,Sample population with a vision difficulty,Sample percent with a vision difficulty,Sample population with an independent living difficulty,Sample percent with an independent living difficulty,Sample population with a self-care difficulty,Sample percent with a self-care difficulty,Sample population with an ambulatory difficulty,Sample percent with an ambulatory difficulty,Sample total sample population,Sample population under 5 years,Sample percent population under 5 years,Sample population 60 years and over,Sample percent population 60 years and over,actual_population_disabled,actual_population_under_age_5,actual_population_over_age_60,actual_population_under_age_5_over_60,Estimated Disability Population,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total,disabled_population_difference,relevant_age_population_difference
count,42.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,41.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,41.0,41.0
mean,38318.12,16.12,5.02,19.83,24.85,1440.1,3.46,782.93,1.81,1575.54,4.19,1003.95,2.37,2352.49,5.68,37807.44,1811.83,5.02,8053.98,19.82,7155.0,1811.83,8053.98,9865.8,7173.5,9630.9,38318.12,0.24,0.00598,0.16,0.00446,-1.3e-13,-2.14e-13
std,22119.7,8.06,1.67,6.81,6.43,1289.52,1.8,656.44,0.84,1401.97,2.42,882.14,1.34,1959.72,2.87,22142.4,1085.85,1.67,5576.72,6.81,6117.81,1085.85,5576.72,6451.79,6043.94,6551.93,22119.7,0.07,0.00407,0.08,0.00376,7.78e-13,1.33e-12
min,3028.0,0.0,2.06,1.66,7.58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3028.0,125.0,2.1,126.0,1.7,0.0,125.0,126.0,377.0,0.0,0.0,3028.0,0.0,0.0,0.0,0.0,-3.64e-12,-3.64e-12
25%,22831.0,10.28,3.74,16.74,21.91,534.0,2.3,384.0,1.3,570.0,2.3,339.0,1.4,980.0,4.0,22800.0,899.0,3.7,4430.0,16.7,2893.0,899.0,4430.0,5890.0,2898.25,5445.25,22831.0,0.22,0.00338,0.11,0.0018,0.0,0.0
50%,34736.5,15.27,4.83,19.36,23.83,1143.0,3.1,599.0,1.9,1201.0,3.8,772.0,2.5,1749.0,5.4,33730.0,1767.0,4.8,6958.0,19.4,5624.0,1767.0,6958.0,8515.0,5629.0,8510.5,34736.5,0.24,0.00529,0.15,0.0035,0.0,0.0
75%,57114.25,21.46,6.08,24.02,28.8,2057.0,4.3,1068.0,2.3,2299.0,6.0,1491.0,3.2,3195.0,7.7,54682.0,2637.0,6.1,11276.0,24.0,9692.0,2637.0,11276.0,13260.0,9675.0,13244.75,57114.25,0.29,0.00823,0.21,0.00601,0.0,0.0
max,92805.0,32.35,9.45,35.32,38.83,5663.0,7.5,3050.0,4.1,5634.0,8.9,3473.0,5.2,7649.0,11.7,92805.0,3911.0,9.4,25020.0,35.3,24260.0,3911.0,25020.0,28931.0,24260.0,28931.0,92805.0,0.39,0.018,0.32,0.0151,9.09e-13,3.64e-12


In [48]:
combined_binh_eliza.to_csv('combined_binh_eliza_df.csv')

In [49]:
!ls

ACS2018_Table_Shells.xlsx
ACSST5Y2018.S1810_data_with_overlays_2020-06-29T172506.csv
ACSST5Y2018.S1810_metadata_2020-06-29T172506.csv
ACSST5Y2018.S1810_table_title_2020-06-29T172506.txt
Analyzing Population by Manhattan Zip Code.ipynb
Checking Eliza df.ipynb
Cleaning Manhattan Zip Code Disability Data (BH).ipynb
Cleaning Manhattan Zip Code Disability Data.ipynb
Cleaning Population Zip Code Data.ipynb
Comparing My and Eliza Population Data.ipynb
Ordered Age.csv
Ordered Disability.csv
Project 1 Testing.ipynb
[31mProposed Subway Elevator Ranking Metric.gsheet[m[m
Untitled spreadsheet.gsheet
Untitled.ipynb
age_disability_acs_2018_5y.csv
cleaned_data_age_dis_df.pickle
cleaned_data_combined_df.pickle
cleaned_data_manhattan_dis_df.pickle
combined_binh_eliza_df.csv
combined_binh_eliza_df.pickle
combined_eliza_df.pickle
manhattan_populations.pickle
new_york_zip_codes_by_population.csv
new_york_zip_codes_by_population.gsheet
percent_age_under_5_over_60_bar_chart.png

In [28]:
combined_eliza.head()

Unnamed: 0,Zip Code,Estimated Disability Population,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total
0,10025,17545,28931,92805,0.31,0.02,0.19,0.0109
1,10002,24260,23825,74993,0.32,0.01,0.32,0.0151
2,10023,9738,21116,62435,0.34,0.01,0.16,0.00605
3,10024,8657,19019,59001,0.32,0.01,0.15,0.00538
4,10029,24241,18966,79597,0.24,0.01,0.3,0.0151


In [18]:
combined_eliza.head()

Unnamed: 0,Zip Code,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60,Estimated Disability Population,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total
0,ZCTA5 10025,28931,92805,0.31,0.02,17545,0.19,0.0109
1,ZCTA5 10002,23825,74993,0.32,0.01,24260,0.32,0.0151
2,ZCTA5 10023,21116,62435,0.34,0.01,9738,0.16,0.00605
3,ZCTA5 10024,19019,59001,0.32,0.01,8657,0.15,0.00538
4,ZCTA5 10029,18966,79597,0.24,0.01,24241,0.3,0.0151


In [3]:
age_eliza.head()

Unnamed: 0,Zip Code,Estimated Age of Population:Under 5 and Over 60,Total Population,Estimated Percentage of Population by ZipCode: Under 5 an Over 60,Estimated Percentage of Population by Total: Under 5 an Over 60
0,ZCTA5 10025,28931,92805,0.31,0.02
1,ZCTA5 10002,23825,74993,0.32,0.01
2,ZCTA5 10023,21116,62435,0.34,0.01
3,ZCTA5 10024,19019,59001,0.32,0.01
4,ZCTA5 10029,18966,79597,0.24,0.01


In [4]:
disability_eliza.head()

Unnamed: 0,Zip Code,Estimated Disability Population,Total Population,Percentage of Population with Disability by Zipcode,Percentage of Population with Disability by Total
0,ZCTA5 10002,24260,74993,0.32,0.0151
1,ZCTA5 10029,24241,79597,0.3,0.0151
2,ZCTA5 10032,18119,64264,0.28,0.0113
3,ZCTA5 10025,17545,92805,0.19,0.0109
4,ZCTA5 10033,15222,59607,0.26,0.00946
