In [1]:
import pandas as pd
import requests
from census import Census
# Census API Key
from config import api_key

In [2]:
internet_data = pd.read_csv("assets/data/internet2020.csv")
internet_df = pd.DataFrame(internet_data)
internet_df

Unnamed: 0,geoid,name,longitude,latitude,pop_with_access
0,100124,"Abbeville, AL",-85.259123,31.564724,2522
1,100460,"Adamsville, AL",-86.971527,33.602315,3892
2,100484,"Addison, AL",-87.178004,34.202681,709
3,100676,"Akron, AL",-87.740899,32.879066,60
4,100820,"Alabaster, AL",-86.847372,33.244399,31440
...,...,...,...,...,...
29966,7828000,"Cruz Bay, VI",-64.779790,18.323748,2832
29967,7837900,"Frederiksted, VI",-64.882274,17.712908,876
29968,7839700,"Frederiksted Southeast, VI",-64.876334,17.703025,2211
29969,7865530,"Red Hook, VI",-64.841226,18.324612,371


In [3]:
# places = internet_df["name"].str.split(",", n = 1, expand = True)
# internet_df["City"] = places[0]
# internet_df["State"] = places[1]
# internet_df

In [4]:
# zipcode Data from: https://simplemaps.com/data/us-zips
zip_data = pd.read_csv("assets/data/uszips.csv", usecols=["zip", "city", "state_id"])
zip_df = pd.DataFrame(zip_data)
zip_df.rename(columns={"state_id": "State", "city": "City", "zip": "Zipcode"}, inplace=True)
zip_df["name"] = zip_df["City"] + ", " + zip_df["State"]
zip_df = zip_df[["Zipcode", "name"]]
zip_df

Unnamed: 0,Zipcode,name
0,601,"Adjuntas, PR"
1,602,"Aguada, PR"
2,603,"Aguadilla, PR"
3,606,"Maricao, PR"
4,610,"Anasco, PR"
...,...,...
33116,99923,"Hyder, AK"
33117,99925,"Klawock, AK"
33118,99926,"Metlakatla, AK"
33119,99927,"Point Baker, AK"


In [5]:
internet_zip = pd.merge(internet_df, zip_df, how="inner", on=["name"])
# internet_zip.drop(columns=["State_x", "name"], inplace=True)
# internet_zip.drop_duplicates("Zipcode", inplace=True)
# internet_zip.rename(columns={"State_y": "State"}, inplace=True)
# internet_zip.sort_values(by=["Zipcode"], inplace=True)
internet_zip

Unnamed: 0,geoid,name,longitude,latitude,pop_with_access,Zipcode
0,100124,"Abbeville, AL",-85.259123,31.564724,2522,36310
1,100460,"Adamsville, AL",-86.971527,33.602315,3892,35005
2,100484,"Addison, AL",-87.178004,34.202681,709,35540
3,100676,"Akron, AL",-87.740899,32.879066,60,35441
4,100820,"Alabaster, AL",-86.847372,33.244399,31440,35007
...,...,...,...,...,...,...
25003,7819900,"Christiansted, VI",-64.706380,17.743930,2395,820
25004,7819900,"Christiansted, VI",-64.706380,17.743930,2395,823
25005,7819900,"Christiansted, VI",-64.706380,17.743930,2395,824
25006,7837900,"Frederiksted, VI",-64.882274,17.712908,876,840


In [6]:
c = Census(api_key, year=2019)
# variables = ("NAME", "B19013_001E", "B01003_001E", "B15003_017E", "B15003_021E","B15003_022E")
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("B19013_001E", "B01003_001E", "B15003_017E", "B15003_021E","B15003_022E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={"B19013_001E": "Median Income",
                                      "B01003_001E": "Population",
                                      "B15003_017E": "Population with High School Diploma",
                                      "B15003_021E": "Population with Associate's Degree",
                                      "B15003_022E": "Population with Bachelor's Degree", 
                                      "zip code tabulation area": "Zipcode"})

# Final DataFrame
census_df = census_df[["Zipcode", "Population", "Median Income", "Population with High School Diploma",
                       "Population with Associate's Degree", "Population with Bachelor's Degree"]]

# Visualize
print(len(census_df))
census_df.head()

33120


Unnamed: 0,Zipcode,Population,Median Income,Population with High School Diploma,Population with Associate's Degree,Population with Bachelor's Degree
0,601,17113.0,14361.0,2952.0,1141.0,1729.0
1,602,37751.0,16807.0,6127.0,2971.0,4001.0
2,603,47081.0,16049.0,8788.0,2525.0,5561.0
3,606,6392.0,12119.0,1303.0,204.0,231.0
4,610,26686.0,19898.0,5138.0,2132.0,2799.0


In [7]:
census_df["Zipcode"] = census_df["Zipcode"].astype(int).map(str)
census_df["Zipcode"] = census_df["Zipcode"].astype(int)
census_df.head()

Unnamed: 0,Zipcode,Population,Median Income,Population with High School Diploma,Population with Associate's Degree,Population with Bachelor's Degree
0,601,17113.0,14361.0,2952.0,1141.0,1729.0
1,602,37751.0,16807.0,6127.0,2971.0,4001.0
2,603,47081.0,16049.0,8788.0,2525.0,5561.0
3,606,6392.0,12119.0,1303.0,204.0,231.0
4,610,26686.0,19898.0,5138.0,2132.0,2799.0


In [8]:
internet_census = pd.merge(internet_zip, census_df, how="inner", on=["Zipcode"])
internet_census.sort_values(by=['name'], inplace=True)
internet_census

Unnamed: 0,geoid,name,longitude,latitude,pop_with_access,Zipcode,Population,Median Income,Population with High School Diploma,Population with Associate's Degree,Population with Bachelor's Degree
18286,4200104,"Aaronsburg, PA",-80.000926,40.010823,181,16820,1327.0,59531.0,274.0,56.0,72.0
0,100124,"Abbeville, AL",-85.259123,31.564724,2522,36310,6275.0,39983.0,1329.0,433.0,335.0
4277,1300184,"Abbeville, GA",-83.306453,31.992576,2465,31001,4547.0,32105.0,994.0,180.0,206.0
8873,2200100,"Abbeville, LA",-92.127157,29.975395,11765,70510,25623.0,49663.0,5450.0,1216.0,1734.0
11432,2800100,"Abbeville, MS",-89.502517,34.503392,468,38601,2073.0,43886.0,372.0,199.0,200.0
...,...,...,...,...,...,...,...,...,...,...,...
11430,2772310,"Zumbro Falls, MN",-92.427083,44.287819,181,55991,1464.0,81364.0,349.0,139.0,133.0
11431,2772328,"Zumbrota, MN",-92.674842,44.295356,3141,55992,4932.0,70450.0,974.0,492.0,635.0
12774,3082750,"Zurich, MT",-109.030243,48.586011,19,59547,73.0,60938.0,23.0,6.0,5.0
7838,1987690,"Zwingle, IA",-90.687446,42.297197,76,52079,729.0,49917.0,230.0,66.0,65.0


In [9]:
city_group = internet_census.groupby(["name"])
population = city_group["Population"].sum()
income = city_group["Median Income"].mean()
high_school = city_group["Population with High School Diploma"].sum()
associate = city_group["Population with Associate's Degree"].sum()
bachelor = city_group["Population with Bachelor's Degree"].sum()

census_gr = pd.DataFrame({
    "Population": population,
    "Median Income": income,
    "Population with High School Diploma": high_school,
    "Population with Associate's Degree": associate,
    "Population with Bachelor's Degree": bachelor
})
census_gr_reset = census_gr.reset_index()
census_gr_reset

Unnamed: 0,name,Population,Median Income,Population with High School Diploma,Population with Associate's Degree,Population with Bachelor's Degree
0,"Aaronsburg, PA",1327.0,59531.0,274.0,56.0,72.0
1,"Abbeville, AL",6275.0,39983.0,1329.0,433.0,335.0
2,"Abbeville, GA",4547.0,32105.0,994.0,180.0,206.0
3,"Abbeville, LA",25623.0,49663.0,5450.0,1216.0,1734.0
4,"Abbeville, MS",2073.0,43886.0,372.0,199.0,200.0
...,...,...,...,...,...,...
19834,"Zumbro Falls, MN",1464.0,81364.0,349.0,139.0,133.0
19835,"Zumbrota, MN",4932.0,70450.0,974.0,492.0,635.0
19836,"Zurich, MT",73.0,60938.0,23.0,6.0,5.0
19837,"Zwingle, IA",729.0,49917.0,230.0,66.0,65.0


In [10]:
city_lat_lon = internet_df[["name", "pop_with_access", "latitude", "longitude"]]
city_lat_lon

Unnamed: 0,name,pop_with_access,latitude,longitude
0,"Abbeville, AL",2522,31.564724,-85.259123
1,"Adamsville, AL",3892,33.602315,-86.971527
2,"Addison, AL",709,34.202681,-87.178004
3,"Akron, AL",60,32.879066,-87.740899
4,"Alabaster, AL",31440,33.244399,-86.847372
...,...,...,...,...
29966,"Cruz Bay, VI",2832,18.323748,-64.779790
29967,"Frederiksted, VI",876,17.712908,-64.882274
29968,"Frederiksted Southeast, VI",2211,17.703025,-64.876334
29969,"Red Hook, VI",371,18.324612,-64.841226


In [11]:
internet_final = pd.merge(census_gr_reset, city_lat_lon, on=["name"])
internet_final

Unnamed: 0,name,Population,Median Income,Population with High School Diploma,Population with Associate's Degree,Population with Bachelor's Degree,pop_with_access,latitude,longitude
0,"Aaronsburg, PA",1327.0,59531.0,274.0,56.0,72.0,181,40.010823,-80.000926
1,"Abbeville, AL",6275.0,39983.0,1329.0,433.0,335.0,2522,31.564724,-85.259123
2,"Abbeville, GA",4547.0,32105.0,994.0,180.0,206.0,2465,31.992576,-83.306453
3,"Abbeville, LA",25623.0,49663.0,5450.0,1216.0,1734.0,11765,29.975395,-92.127157
4,"Abbeville, MS",2073.0,43886.0,372.0,199.0,200.0,468,34.503392,-89.502517
...,...,...,...,...,...,...,...,...,...
19958,"Zumbro Falls, MN",1464.0,81364.0,349.0,139.0,133.0,181,44.287819,-92.427083
19959,"Zumbrota, MN",4932.0,70450.0,974.0,492.0,635.0,3141,44.295356,-92.674842
19960,"Zurich, MT",73.0,60938.0,23.0,6.0,5.0,19,48.586011,-109.030243
19961,"Zwingle, IA",729.0,49917.0,230.0,66.0,65.0,76,42.297197,-90.687446


In [14]:
internet_final.rename(columns={"name": "city",
                               "pop_with_access": "pop_with_high_speed_internet",
                               "Population with High School Diploma": "pop_with_hs_diploma",
                              "Population with Associate's Degree": "pop_with_associates",
                              "Population with Bachelor's Degree": "pop_with_bachelors",
                              "Population": "population",
                              "Median Income": "median_income",}, inplace=True)
internet_final


Unnamed: 0,city,population,median_income,pop_with_hs_diploma,pop_with_associates,pop_with_bachelors,pop_with_high_speed_internet,latitude,longitude
0,"Aaronsburg, PA",1327.0,59531.0,274.0,56.0,72.0,181,40.010823,-80.000926
1,"Abbeville, AL",6275.0,39983.0,1329.0,433.0,335.0,2522,31.564724,-85.259123
2,"Abbeville, GA",4547.0,32105.0,994.0,180.0,206.0,2465,31.992576,-83.306453
3,"Abbeville, LA",25623.0,49663.0,5450.0,1216.0,1734.0,11765,29.975395,-92.127157
4,"Abbeville, MS",2073.0,43886.0,372.0,199.0,200.0,468,34.503392,-89.502517
...,...,...,...,...,...,...,...,...,...
19958,"Zumbro Falls, MN",1464.0,81364.0,349.0,139.0,133.0,181,44.287819,-92.427083
19959,"Zumbrota, MN",4932.0,70450.0,974.0,492.0,635.0,3141,44.295356,-92.674842
19960,"Zurich, MT",73.0,60938.0,23.0,6.0,5.0,19,48.586011,-109.030243
19961,"Zwingle, IA",729.0,49917.0,230.0,66.0,65.0,76,42.297197,-90.687446


In [15]:
internet_final.to_csv("assets/data/internet_census_combined.csv", encoding="utf-8", index=False)