In [24]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json
import geopandas as gpd
import matplotlib.pyplot as plt
import hvplot.pandas

# Import the API key
from config2 import api_key
from config2 import weather_api_key

In [25]:
#Create Dataframe with Zillow Region Data
zillow_regions = pd.read_csv("Zillow Regions.csv")
zillow_regions.head()

Unnamed: 0,region_id,region_type,region
0,1286,county,Orange County;CA;Los Angeles-Long Beach-Anahei...
1,3175,county,Philadelphia County;PA;Philadelphia-Camden-Wil...
2,3017,county,Sacramento County;CA;Sacramento-Roseville-Fols...
3,401,county,"Bronx County;NY;New York-Newark-Jersey City, N..."
4,3165,county,Hillsborough County;FL;Tampa-St. Petersburg-Cl...


In [26]:
#create the new dataframes
city_regions = zillow_regions.loc[zillow_regions["region_type"] == "city"]
county_regions = zillow_regions.loc[zillow_regions["region_type"] == "county"]
neigh_regions = zillow_regions.loc[zillow_regions["region_type"] == "neigh"]
zip_regions = zillow_regions.loc[zillow_regions["region_type"] == "zip"]
metro_regions = zillow_regions.loc[zillow_regions["region_type"] == "metro"]

In [27]:
#clean the data by splitting region
split_region = city_regions['region'].str.split(';', n=4, expand=True)
split_region.columns = ['city', 'state', 'metro_area', 'county']

In [28]:
# Concatenating the split columns with the original DataFrame
city_regions = pd.concat([city_regions, split_region], axis=1)

# Dropping the original 'region' column
city_regions.drop(columns=['region'], inplace=True)

In [29]:
city_regions.head()

Unnamed: 0,region_id,region_type,city,state,metro_area,county
398,53060,city,Lubbock,TX,"Lubbock, TX",Lubbock County
399,12065,city,Irving,TX,"Dallas-Fort Worth-Arlington, TX",Dallas County
400,18298,city,Glendale,AZ,"Phoenix-Mesa-Chandler, AZ",Maricopa County
401,53856,city,Pensacola,FL,"Pensacola-Ferry Pass-Brent, FL",Escambia County
402,23482,city,Anchorage,AK,"Anchorage, AK",Anchorage Borough


County Dataframe

In [30]:
county_regions.head()

Unnamed: 0,region_id,region_type,region
0,1286,county,Orange County;CA;Los Angeles-Long Beach-Anahei...
1,3175,county,Philadelphia County;PA;Philadelphia-Camden-Wil...
2,3017,county,Sacramento County;CA;Sacramento-Roseville-Fols...
3,401,county,"Bronx County;NY;New York-Newark-Jersey City, N..."
4,3165,county,Hillsborough County;FL;Tampa-St. Petersburg-Cl...


In [31]:
#clean the data by splitting region
split_region = county_regions['region'].str.split(';', n=3, expand=True)
split_region.columns = ['county', 'state', 'metro_area']

In [32]:
# Concatenating the split columns with the original DataFrame
county_regions = pd.concat([county_regions, split_region], axis=1)

# Dropping the original 'region' column
county_regions.drop(columns=['region'], inplace=True)

In [33]:
#check the dataframe
county_regions.head()

Unnamed: 0,region_id,region_type,county,state,metro_area
0,1286,county,Orange County,CA,"Los Angeles-Long Beach-Anaheim, CA"
1,3175,county,Philadelphia County,PA,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD"
2,3017,county,Sacramento County,CA,"Sacramento-Roseville-Folsom, CA"
3,401,county,Bronx County,NY,"New York-Newark-Jersey City, NY-NJ-PA"
4,3165,county,Hillsborough County,FL,"Tampa-St. Petersburg-Clearwater, FL"


Neighborhood Dataframe

In [34]:
#investigate data frame
neigh_regions.head()

Unnamed: 0,region_id,region_type,region
167,274772,neigh,Northeast Dallas; TX; Dallas-Fort Worth-Arling...
183,273698,neigh,"Far North; TX; Dallas-Fort Worth-Arlington, TX..."
243,275473,neigh,Southeast Dallas; TX; Dallas-Fort Worth-Arling...
436,196538,neigh,Murray Hill; NY; New York-Newark-Jersey City; ...
969,274742,neigh,North Scottsdale; AZ; Phoenix-Mesa-Scottsdale;...


In [35]:
#split the "region" column
split_region = neigh_regions['region'].str.split(';', n=5, expand=True)
split_region.columns = ['neighborhood', 'state', 'metro_area', 'city', 'county']

In [36]:
# Concatenating the split columns with the original DataFrame
neigh_regions = pd.concat([neigh_regions, split_region], axis=1)

# Dropping the original 'region' column
neigh_regions.drop(columns=['region'], inplace=True)

In [37]:
#view the final dataframe
neigh_regions.head()

Unnamed: 0,region_id,region_type,neighborhood,state,metro_area,city,county
167,274772,neigh,Northeast Dallas,TX,"Dallas-Fort Worth-Arlington, TX",Dallas County,Dallas
183,273698,neigh,Far North,TX,"Dallas-Fort Worth-Arlington, TX",Dallas County,Dallas
243,275473,neigh,Southeast Dallas,TX,"Dallas-Fort Worth-Arlington, TX",Dallas County,Dallas
436,196538,neigh,Murray Hill,NY,New York-Newark-Jersey City,Queens County,New York
969,274742,neigh,North Scottsdale,AZ,Phoenix-Mesa-Scottsdale,Maricopa County,Scottsdale


Zip Code Dataframe

In [38]:
#investigate the dataframe
zip_regions.head()

Unnamed: 0,region_id,region_type,region
650,58011,zip,00612; MI; Crawford County; Frederic
1282,58051,zip,00693; PA; Pittsburgh; Westmoreland County; Gr...
1797,70935,zip,"30165;GA;Rome, GA;Rome;Floyd County"
1963,58109,zip,00795; AL; Auburn-Opelika; Lee County; Juana Diaz
3442,58129,zip,00907; AR; Forrest City; Saint Francis County;...


In [39]:
#split the "region" column
split_region = zip_regions['region'].str.split(';', n=5, expand=True)
split_region.columns = ['zipcode', 'state', 'metro_area', 'city', 'county', 'blank']

In [40]:
# Concatenating the split columns with the original DataFrame
zip_regions = pd.concat([zip_regions, split_region], axis=1)

# Dropping the original 'region' column
zip_regions.drop(columns=['region'], inplace=True)
zip_regions.drop(columns=['blank'], inplace=True)

In [41]:
#view the final dataframe
zip_regions.head()

Unnamed: 0,region_id,region_type,zipcode,state,metro_area,city,county
650,58011,zip,612,MI,Crawford County,Frederic,
1282,58051,zip,693,PA,Pittsburgh,Westmoreland County,Greensburg
1797,70935,zip,30165,GA,"Rome, GA",Rome,Floyd County
1963,58109,zip,795,AL,Auburn-Opelika,Lee County,Juana Diaz
3442,58129,zip,907,AR,Forrest City,Saint Francis County,Widener


Metro Region Dataframe

In [42]:
#investigate metro region
metro_regions.head()

Unnamed: 0,region_id,region_type,region
5,394653,metro,"Greenville, SC"
6,394312,metro,"Albuquerque, NM"
7,394357,metro,"Bakersfield, CA"
8,394308,metro,"Albany, NY"
9,394753,metro,"Knoxville, TN"


In [43]:
split_region = metro_regions['region'].str.split(',', n=1, expand=True)
split_region.columns = ['city', 'state']

In [44]:
# Concatenating the split columns with the original DataFrame
metro_regions = pd.concat([metro_regions, split_region], axis=1)

# Dropping the original 'region' column
metro_regions.drop(columns=['region'], inplace=True)

In [45]:
#view dataframe
metro_regions

Unnamed: 0,region_id,region_type,city,state
5,394653,metro,Greenville,SC
6,394312,metro,Albuquerque,NM
7,394357,metro,Bakersfield,CA
8,394308,metro,Albany,NY
9,394753,metro,Knoxville,TN
...,...,...,...,...
89149,753924,metro,Urban Honolulu,HI
89150,395169,metro,Tulsa,OK
89151,394619,metro,Fresno,CA
89152,395238,metro,Worcester,MA


Creating CSV Files

In [46]:
metro_regions.to_csv("Resources/metro regions.csv", index=False)
zip_regions.to_csv("Resources/zip regions.csv", index=False)
neigh_regions.to_csv("Resources/neigh regions.csv", index=False)
city_regions.to_csv("Resources/city regions.csv", index=False)
county_regions.to_csv("Resources/county regions.csv", index=False)