In [230]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json

# Import the API key
from config1 import api_key

In [231]:
#Create Dataframe with Zillow Indicator Data
zillow_indicators = pd.read_csv("Zillow Indicators.csv")
zillow_indicators.head()

Unnamed: 0,indicator_id,indicator,category
0,MRAM,"Mean Days to Pending (Raw, All Homes, Monthly)",Inventory and sales
1,SSSW,"Median Sale Price (Smooth, SFR only, Weekly View)",Inventory and sales
2,LSSM,"Median List Price (Smooth, SFR Only, Monthly)",Inventory and sales
3,CSAW,"Share of Listings With a Price Cut (Smooth, Al...",Inventory and sales
4,ISSM,"For-Sale Inventory (Smooth, SFR only, Monthly)",Inventory and sales


In [232]:
#Create Dataframe with Zillow Region Data
zillow_regions = pd.read_csv("Zillow Regions.csv")
zillow_regions.head()

Unnamed: 0,region_id,region_type,region
0,1286,county,Orange County;CA;Los Angeles-Long Beach-Anahei...
1,3175,county,Philadelphia County;PA;Philadelphia-Camden-Wil...
2,3017,county,Sacramento County;CA;Sacramento-Roseville-Fols...
3,401,county,"Bronx County;NY;New York-Newark-Jersey City, N..."
4,3165,county,Hillsborough County;FL;Tampa-St. Petersburg-Cl...


In [233]:
#Search regions Dataframe for Chicago
p = "Chicago"
chicago_regions = zillow_regions[zillow_regions['region'].str.contains(p)]
chicago_regions.head()

Unnamed: 0,region_id,region_type,region
425,10215,city,"Aurora;IL;Chicago-Naperville-Elgin, IL-IN-WI;K..."
607,41587,city,"Waukegan;IL;Chicago-Naperville-Elgin, IL-IN-WI..."
646,44597,city,"Cicero;IL;Chicago-Naperville-Elgin, IL-IN-WI;C..."
654,41460,city,"Valparaiso;IN;Chicago-Naperville-Elgin, IL-IN-..."
666,50764,city,"Arlington Heights;IL;Chicago-Naperville-Elgin,..."


In [234]:
#find all the region types
chicago_regions["region_type"].unique()

array(['city', 'county', 'neigh', 'zip', 'metro'], dtype=object)

In [235]:
#create the new dataframes
city_regions = chicago_regions.loc[chicago_regions["region_type"] == "city"]
county_regions = chicago_regions.loc[chicago_regions["region_type"] == "county"]
neigh_regions = chicago_regions.loc[chicago_regions["region_type"] == "neigh"]
zip_regions = chicago_regions.loc[chicago_regions["region_type"] == "zip"]
metro_regions = chicago_regions.loc[chicago_regions["region_type"] == "metro"]

City Dataframe Cleaning

In [236]:
city_regions.head()

Unnamed: 0,region_id,region_type,region
425,10215,city,"Aurora;IL;Chicago-Naperville-Elgin, IL-IN-WI;K..."
607,41587,city,"Waukegan;IL;Chicago-Naperville-Elgin, IL-IN-WI..."
646,44597,city,"Cicero;IL;Chicago-Naperville-Elgin, IL-IN-WI;C..."
654,41460,city,"Valparaiso;IN;Chicago-Naperville-Elgin, IL-IN-..."
666,50764,city,"Arlington Heights;IL;Chicago-Naperville-Elgin,..."


In [237]:
#clean the data by splitting region
split_region = city_regions['region'].str.split(';', n=4, expand=True)
split_region.columns = ['city', 'state', 'state_region', 'county']


In [238]:
# Concatenating the split columns with the original DataFrame
city_regions = pd.concat([city_regions, split_region], axis=1)

# Dropping the original 'region' column
city_regions.drop(columns=['region'], inplace=True)


In [239]:
city_regions.info()

<class 'pandas.core.frame.DataFrame'>
Index: 370 entries, 425 to 89052
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   region_id     370 non-null    int64 
 1   region_type   370 non-null    object
 2   city          370 non-null    object
 3   state         370 non-null    object
 4   state_region  370 non-null    object
 5   county        370 non-null    object
dtypes: int64(1), object(5)
memory usage: 20.2+ KB


In [240]:
city_regions.head()

Unnamed: 0,region_id,region_type,city,state,state_region,county
425,10215,city,Aurora,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Kane County
607,41587,city,Waukegan,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Lake County
646,44597,city,Cicero,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Cook County
654,41460,city,Valparaiso,IN,"Chicago-Naperville-Elgin, IL-IN-WI",Porter County
666,50764,city,Arlington Heights,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Cook County


County Dataframe Cleaning

In [241]:
county_regions.head()

Unnamed: 0,region_id,region_type,region
1453,139,county,"Cook County;IL;Chicago-Naperville-Elgin, IL-IN-WI"
6716,1682,county,"DuPage County;IL;Chicago-Naperville-Elgin, IL-..."
13058,220,county,"Lake County;IL;Chicago-Naperville-Elgin, IL-IN-WI"
14819,330,county,"Will County;IL;Chicago-Naperville-Elgin, IL-IN-WI"
18197,197,county,"Kane County;IL;Chicago-Naperville-Elgin, IL-IN-WI"


In [242]:
#clean the data by splitting region
split_region = county_regions['region'].str.split(';', n=3, expand=True)
split_region.columns = ['county', 'state', 'state_region']

In [243]:
# Concatenating the split columns with the original DataFrame
county_regions = pd.concat([county_regions, split_region], axis=1)

# Dropping the original 'region' column
county_regions.drop(columns=['region'], inplace=True)

In [244]:
#check that all columns are full
county_regions.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 1453 to 36340
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   region_id     14 non-null     int64 
 1   region_type   14 non-null     object
 2   county        14 non-null     object
 3   state         14 non-null     object
 4   state_region  14 non-null     object
dtypes: int64(1), object(4)
memory usage: 672.0+ bytes


In [245]:
#check the dataframe
county_regions.head()

Unnamed: 0,region_id,region_type,county,state,state_region
1453,139,county,Cook County,IL,"Chicago-Naperville-Elgin, IL-IN-WI"
6716,1682,county,DuPage County,IL,"Chicago-Naperville-Elgin, IL-IN-WI"
13058,220,county,Lake County,IL,"Chicago-Naperville-Elgin, IL-IN-WI"
14819,330,county,Will County,IL,"Chicago-Naperville-Elgin, IL-IN-WI"
18197,197,county,Kane County,IL,"Chicago-Naperville-Elgin, IL-IN-WI"


Neighborhood Dataframe Cleaning

In [246]:
#investigate data frame
neigh_regions.head()

Unnamed: 0,region_id,region_type,region
3705,403295,neigh,Lake View East; IL; Chicago-Naperville-Elgin; ...
5918,403288,neigh,Old Irving Park; IL; Chicago-Naperville-Elgin;...
7216,403286,neigh,Mayfair; IL; Chicago-Naperville-Elgin; Cook Co...
7245,403271,neigh,North Mayfair; IL; Chicago-Naperville-Elgin; C...
8372,403306,neigh,Sheffield Neighbors; IL; Chicago-Naperville-El...


In [247]:
#split the "region" column
split_region = neigh_regions['region'].str.split(';', n=5, expand=True)
split_region.columns = ['neighborhood', 'state', 'state_region', 'county', 'city']

In [248]:
# Concatenating the split columns with the original DataFrame
neigh_regions = pd.concat([neigh_regions, split_region], axis=1)

# Dropping the original 'region' column
neigh_regions.drop(columns=['region'], inplace=True)

In [249]:
#check the dataframe
neigh_regions.info()

<class 'pandas.core.frame.DataFrame'>
Index: 563 entries, 3705 to 84483
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   region_id     563 non-null    int64 
 1   region_type   563 non-null    object
 2   neighborhood  563 non-null    object
 3   state         563 non-null    object
 4   state_region  563 non-null    object
 5   county        563 non-null    object
 6   city          563 non-null    object
dtypes: int64(1), object(6)
memory usage: 35.2+ KB


In [250]:
#view the final dataframe
neigh_regions.head()

Unnamed: 0,region_id,region_type,neighborhood,state,state_region,county,city
3705,403295,neigh,Lake View East,IL,Chicago-Naperville-Elgin,Cook County,Chicago
5918,403288,neigh,Old Irving Park,IL,Chicago-Naperville-Elgin,Cook County,Chicago
7216,403286,neigh,Mayfair,IL,Chicago-Naperville-Elgin,Cook County,Chicago
7245,403271,neigh,North Mayfair,IL,Chicago-Naperville-Elgin,Cook County,Chicago
8372,403306,neigh,Sheffield Neighbors,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Cook County,Chicago


Zipcode Dataframe Cleaning

In [251]:
#investigate the dataframe
zip_regions.head()

Unnamed: 0,region_id,region_type,region
34764,78144,zip,46411; IN; Chicago-Naperville-Elgin; Whiting;L...
45696,84392,zip,60141; IL; Chicago-Naperville-Elgin; Broadview...
45776,84630,zip,"60629;IL;Chicago-Naperville-Elgin, IL-IN-WI;Ch..."
45797,84620,zip,"60618;IL;Chicago-Naperville-Elgin, IL-IN-WI;Ch..."
45805,84639,zip,"60639;IL;Chicago-Naperville-Elgin, IL-IN-WI;Ch..."


In [252]:
#split the "region" column
split_region = zip_regions['region'].str.split(';', n=5, expand=True)
split_region.columns = ['zipcode', 'state', 'state_region', 'city', 'county', 'blank']

In [253]:
# Concatenating the split columns with the original DataFrame
zip_regions = pd.concat([zip_regions, split_region], axis=1)

# Dropping the original 'region' column
zip_regions.drop(columns=['region'], inplace=True)
zip_regions.drop(columns=['blank'], inplace=True)

In [254]:
#view the final dataframe
zip_regions.head()

Unnamed: 0,region_id,region_type,zipcode,state,state_region,city,county
34764,78144,zip,46411,IN,Chicago-Naperville-Elgin,Whiting,Lake County
45696,84392,zip,60141,IL,Chicago-Naperville-Elgin,Broadview,Cook County
45776,84630,zip,60629,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Chicago,Cook County
45797,84620,zip,60618,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Chicago,Cook County
45805,84639,zip,60639,IL,"Chicago-Naperville-Elgin, IL-IN-WI",Chicago,Cook County


In [255]:
#check that the county info is correct
zip_regions["county"].value_counts()

county
Cook County        164
Lake County         54
DuPage County       36
Will County         30
McHenry County      19
Kane County         19
DeKalb County       12
Kenosha County      10
Grundy County        8
Porter County        8
Kendall County       7
Newton County        6
Jasper County        5
 Kane County         3
 Kenosha County      2
 Cook County         1
 DeKalb County       1
 Kendall County      1
 DuPage County       1
 Newton County       1
 Porter County       1
Name: count, dtype: int64

Metro Region DataFrame Cleaning

In [256]:
#investigate metro region
metro_regions.head()

Unnamed: 0,region_id,region_type,region
89098,394463,metro,"Chicago, IL"


In [257]:
#clean data by splitting "region" column
split_region = metro_regions['region'].str.split(',', n=1, expand=True)
split_region.columns = ['city', 'state']

In [258]:
# Concatenating the split columns with the original DataFrame
metro_regions = pd.concat([metro_regions, split_region], axis=1)

# Dropping the original 'region' column
metro_regions.drop(columns=['region'], inplace=True)

In [260]:
#view dataframe
metro_regions

Unnamed: 0,region_id,region_type,city,state
89098,394463,metro,Chicago,IL


In [262]:
# Export categories_df and subcategories_df as CSV files.
metro_regions.to_csv("Resources/metro regions.csv", index=False)
zip_regions.to_csv("Resources/zip regions.csv", index=False)
neigh_regions.to_csv("Resources/neigh regions.csv", index=False)
city_regions.to_csv("Resources/city regions.csv", index=False)
county_regions.to_csv("Resources/county regions.csv", index=False)

In [None]:
base_url = "https://data.nasdaq.com/api/v3/datatables/ZILLOW/DATA?indicator_id=ZSFH&region_id={region}&api_key={api_key}"