In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from unicodedata import normalize 
import pgeocode

# Orlando, FL Population Datasets

- [Orlando, FL population density datasets](#Orlando,-FL-population-density-datasets)
- [Orlando, FL household income density datasets](#Orlando,-FL-household-income-density-datasets)
- [Merge Orlando, FL datasets](#Merge-Orlando,-FL-datasets)

## Orlando, FL population density datasets

Resources: [Link](https://blog.splitwise.com/2013/09/18/the-2010-us-census-population-by-zip-code-totally-free/)

by July 2021

In [2]:
population_df = pd.read_csv("data/zipcode pop.csv")
population_df.head()

Unnamed: 0,Zip/ZCTA,2010 Population,Land-Sq-Mi,Density Per Sq Mile
0,601,0,64.348,0.0
1,602,0,30.613,0.0
2,603,0,31.616,0.0
3,606,0,42.309,0.0
4,610,0,35.916,0.0


In [3]:
# Rename columns
population_df.rename(columns={"Zip/ZCTA": "zipcode", "2010 Population": "population", "Density Per Sq Mile": "pop_density"}, inplace=True)
# Drop columns
population_df.drop(columns=['Land-Sq-Mi'], inplace=True)
population_df.head()

Unnamed: 0,zipcode,population,pop_density
0,601,0,0.0
1,602,0,0.0
2,603,0,0.0
3,606,0,0.0
4,610,0,0.0


In [4]:
# Filter Orlando, FL zipcode only
zipcode = [32801,32804,32807,32789,32811,32812,32835,32819,32839,32824,32827,32822,32829,32832,32808,32817,32828,32818,32821,32825,32826,32836,32837,32805,32806,32803,32814,32809,32831,32792]
population_df = population_df[population_df['zipcode'].isin(zipcode)]
population_df.reset_index(drop=True, inplace=True)
population_df.sort_values(by=['zipcode'], inplace=True)
population_df

Unnamed: 0,zipcode,population,pop_density
0,32789,24557,3147.929753
1,32792,46914,3816.628702
2,32801,12050,5273.522976
3,32803,19020,2735.903337
4,32804,17312,2364.058446
5,32805,21810,3277.235162
6,32806,24820,3726.167242
7,32807,31465,3892.256309
8,32808,46334,3868.904476
9,32809,26773,2581.027668


## Orlando, FL household income density datasets

Resource: [Link](https://www.psc.isr.umich.edu/dis/census/Features/tract2zip/)

by 2020

In [5]:
household_income = pd.read_excel("data/household_income.xlsx")
household_income.head()

Unnamed: 0,Zip,Median,Mean,Pop
0,1001,56662.5735,66687.7509,16445
1,1002,49853.4177,75062.6343,28069
2,1003,28462.0,35121.0,8491
3,1005,75423.0,82442.0,4798
4,1007,79076.354,85801.975,12962


In [6]:
# Drop columns
household_income.drop(columns=['Mean','Pop'], inplace=True)
# Rename columns
household_income.rename(columns={"Zip": "zipcode", "Median": "income"}, inplace=True)
# Filter Orlando, FL only
zipcode = [32801,32804,32807,32789,32811,32812,32835,32819,32839,32824,32827,32822,32829,32832,32808,32817,32828,32818,32821,32825,32826,32836,32837,32805,32806,32803,32814,32809,32831,32792]
household_income = household_income[household_income['zipcode'].isin(zipcode)]
household_income.reset_index(drop=True, inplace=True)
household_income.sort_values(by='zipcode')
household_income

Unnamed: 0,zipcode,income
0,32789,73938.4426
1,32792,45745.7203
2,32801,27798.8516
3,32803,55839.2906
4,32804,57793.8146
5,32805,27620.3283
6,32806,53832.8962
7,32807,40232.1299
8,32808,36308.1834
9,32809,42157.7748


## Merge Orlando, FL datasets

In [7]:
nomi = pgeocode.Nominatim('us')
df_1 = nomi.query_postal_code(['32801','32804','32789','32811','32835','32819','32839','32824','32827','32822','32829','32832','32808','32817','32828','32818','32821','32825','32826','32836','32837','32805','32806','32803','32814','32809','32831','32792','32812','32807'])

In [8]:
df_coor = df_1[['postal_code','latitude','longitude']]
df_coor.rename(columns={"postal_code":"zipcode"}, inplace=True)
df_coor

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,zipcode,latitude,longitude
0,32801,28.5399,-81.3727
1,32804,28.5754,-81.3955
2,32789,28.5978,-81.3534
3,32811,28.5163,-81.4516
4,32835,28.5289,-81.4787
5,32819,28.4522,-81.4678
6,32839,28.4871,-81.4082
7,32824,28.3932,-81.3622
8,32827,28.4317,-81.343
9,32822,28.4944,-81.2902


In [9]:
df_coor.sort_values(by='zipcode', inplace=True)
df_coor.reset_index(drop=True, inplace=True)
df_coor

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,zipcode,latitude,longitude
0,32789,28.5978,-81.3534
1,32792,28.5974,-81.3036
2,32801,28.5399,-81.3727
3,32803,28.5559,-81.3535
4,32804,28.5754,-81.3955
5,32805,28.5302,-81.4045
6,32806,28.514,-81.357
7,32807,28.5515,-81.3051
8,32808,28.5803,-81.4396
9,32809,28.4637,-81.3948


In [10]:
orlando_population = population_df[['zipcode', 'population','pop_density']]
orlando_population['income'] = household_income['income']
orlando_population

Unnamed: 0,zipcode,population,pop_density,income
0,32789,24557,3147.929753,73938.4426
1,32792,46914,3816.628702,45745.7203
2,32801,12050,5273.522976,27798.8516
3,32803,19020,2735.903337,55839.2906
4,32804,17312,2364.058446,57793.8146
5,32805,21810,3277.235162,27620.3283
6,32806,24820,3726.167242,53832.8962
7,32807,31465,3892.256309,40232.1299
8,32808,46334,3868.904476,36308.1834
9,32809,26773,2581.027668,42157.7748


In [11]:
orlando_population[['latitude','longitude']] = df_coor[['latitude','longitude']]
orlando_population

Unnamed: 0,zipcode,population,pop_density,income,latitude,longitude
0,32789,24557,3147.929753,73938.4426,28.5978,-81.3534
1,32792,46914,3816.628702,45745.7203,28.5974,-81.3036
2,32801,12050,5273.522976,27798.8516,28.5399,-81.3727
3,32803,19020,2735.903337,55839.2906,28.5559,-81.3535
4,32804,17312,2364.058446,57793.8146,28.5754,-81.3955
5,32805,21810,3277.235162,27620.3283,28.5302,-81.4045
6,32806,24820,3726.167242,53832.8962,28.514,-81.357
7,32807,31465,3892.256309,40232.1299,28.5515,-81.3051
8,32808,46334,3868.904476,36308.1834,28.5803,-81.4396
9,32809,26773,2581.027668,42157.7748,28.4637,-81.3948


In [12]:
orlando_population.to_csv("data/orlando_pop.csv", index=False)