In [None]:
# Load packages
import os
import csv
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import hvplot.pandas
import requests
from config2 import geoapify_key
from fredapi import Fred
from config1 import api_key
from pprint import pprint
# Initialize Fred api with api_key
fred = Fred(api_key=api_key)


In [39]:
#Initializing the file output
file_to_output = os.path.join("US_Unemployment.csv") 

In [40]:
# Assigning the series id for US unemployment Data.
series_id = "UNRATE"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
us_unemployment = fred.get_series(series_id, observation_start="2004-01-01")

# Converted the data to a DataFrame
us_unemployment_df = pd.DataFrame(us_unemployment, columns=['Unemployment Rate'])

# Indexed the date
us_unemployment_df.index.name = "Date"


In [41]:
# Print the DataFrame
pprint(us_unemployment_df)

# Write to CSV file
us_unemployment_df.to_csv("../Data/US_Unemployment.csv")

            Unemployment Rate
Date                         
2004-01-01                5.7
2004-02-01                5.6
2004-03-01                5.8
2004-04-01                5.6
2004-05-01                5.6
...                       ...
2024-06-01                4.1
2024-07-01                4.3
2024-08-01                4.2
2024-09-01                4.1
2024-10-01                4.1

[250 rows x 1 columns]


In [42]:
#Initializing the file output
file_to_output = os.path.join("Median_CPI.csv") 

In [43]:
# Assigning the series id for Median CPI Data.
series_id = "MEDCPIM158SFRBCLE"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
median_cpi = fred.get_series(series_id, observation_start="2004-01-01")

# Converted the data to a DataFrame
median_cpi_df = pd.DataFrame(median_cpi, columns=['Median_CPI'])

# Indexed the date
median_cpi_df.index.name = "Date"



In [44]:
# Print the DataFrame
pprint(median_cpi_df)

# Write to CSV file
median_cpi_df.to_csv("../Data/Median_CPI.csv")

            Median_CPI
Date                  
2004-01-01    2.220075
2004-02-01    2.912655
2004-03-01    2.734402
2004-04-01    2.538382
2004-05-01    3.217495
...                ...
2024-06-01    2.364665
2024-07-01    3.804998
2024-08-01    3.180397
2024-09-01    4.146461
2024-10-01    3.607369

[250 rows x 1 columns]


In [45]:
#Initializing the file output
file_to_output = os.path.join("Build_Permit.csv") 

In [46]:
# list of states
states = {"Alabama": "AL",
         "Alaska": "AK",
         "Arizona": "AZ",
         "Arkansas": "AR",
         "California": "CA",
         "Colorado":"CO",
         "Connecticut": "CT",
         "Delaware": "DE",
         "District of Columbia": "DC",
         "Florida": "FL",
         "Georgia": "GA",
         "Hawaii": "HI",
         "Idaho": "ID",
         "Illinois": "IL", 
         "Indiana": "IN",
         "Iowa": "IA",
         "Kansas": "KS",
         "Kentucky": "KY",
         "Louisiana": "LA",
         "Maine": "ME",
         "Maryland": "MD",
         "Massachusetts":"MA",
         "Michigan": "MI",
         "Minnesota": "MN",
         "Mississippi": "MS",
         "Missouri": "MO",
         "Montana": "MT",
         "Nebraska": "NE",
         "Nevada": "NV",
         "New Hampshire": "NH",
         "New Jersey": "NJ",
         "New Mexico": "NM", 
         "New York": "NY",
         "North Carolina": "NC",
         "North Dakota": "ND",
         "Ohio": "OH",
         "Oklahoma": "OK",
         "Oregon": "OR",
         "Pennsylvania": "PA",
         "Rhode Island": "RI",
         "South Carolina": "SC",
         "South Dakota": "SD",
         "Tennessee": "TN",
         "Texas": "TX",
         "Utah": "UT",
         "Vermont": "VT",
         "Virginia": "VA",
         "Washington": "WA",
         "West Virginia": "WV",
         "Wisconsin": "WI",
         "Wyoming": "WY",
         }

In [47]:
# Created a dictionary to store building permit data on all states
build_permit_data = {}

# Collecting building permit data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"{abbreviation}BPPRIVSA"
    
    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference:
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    build_permit = fred.get_series(series_id, observation_start="2004-01-01")
    
    # Add the data to the dictionary
    build_permit_data[state] = build_permit

# Convert the data to a DataFrame
build_permit_df = pd.DataFrame(build_permit_data)

# Indexed the date
build_permit_df.index.name = "Date"



In [48]:
# Print the DataFrame
pprint(build_permit_df.head())

# Write to CSV file
build_permit_df.to_csv("../Data/Build_Permit.csv")

                Alabama      Alaska      Arizona     Arkansas    California  \
Date                                                                          
2004-01-01  2362.659688  228.375488  6125.353891  1292.034177  16135.819376   
2004-02-01  2295.240670  190.714917  6048.844553  1052.341451  16223.077407   
2004-03-01  3019.797614  248.156023  7132.259490  1097.886143  17173.783094   
2004-04-01  2425.664445  282.369728  7405.707855  1633.830391  16985.603333   
2004-05-01  2386.274858  329.782850  6719.256073  1553.764383  16074.646047   

               Colorado  Connecticut    Delaware  District of Columbia  \
Date                                                                     
2004-01-01  3688.262428   956.699800  660.943737                  10.0   
2004-02-01  3365.395504   867.594829  739.359216                 105.0   
2004-03-01  4012.430210   994.390833  783.786522                  38.0   
2004-04-01  3927.088989   944.699453  532.186749                 727.0   
20

In [51]:
#Initializing the file output
file_to_output = os.path.join("Higher_Ed.csv") 

In [52]:
# Created a dictionary to store home value data on all states
higher_ed_data = {}

# Collecting higher education data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"GCT1502{abbreviation}"

    # Collecting the series data from FRED
    # From 2006 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    higher_ed = fred.get_series(series_id, observation_start="2006-01-01")

    # Add the data to the dictionary
    higher_ed_data[state] = higher_ed

# Convert the data to a DataFrame
higher_ed_df = pd.DataFrame(higher_ed_data)

# Indexed the date
higher_ed_df.index.name = "Date"


In [53]:
# Print the DataFrame
pprint(higher_ed_df)

# Write to CSV file
higher_ed_df.to_csv("../Data/Higher_Ed.csv")

            Alabama  Alaska  Arizona  Arkansas  California  Colorado  \
Date                                                                   
2006-01-01     21.1    26.9     25.5      18.2        29.0      34.3   
2007-01-01     21.4    26.0     25.3      19.3        29.5      35.0   
2008-01-01     22.0    27.3     25.1      18.8        29.6      35.6   
2009-01-01     22.0    26.6     25.6      18.9        29.9      35.9   
2010-01-01     21.9    27.9     25.9      19.5        30.1      36.4   
2011-01-01     22.3    26.4     26.6      20.3        30.3      36.7   
2012-01-01     23.3    28.0     27.3      21.0        30.9      37.5   
2013-01-01     23.5    28.0     27.4      20.6        31.0      37.8   
2014-01-01     23.5    28.0     27.6      21.4        31.7      38.3   
2015-01-01     24.2    29.7     27.7      21.8        32.3      39.2   
2016-01-01     24.7    29.6     28.9      22.4        32.9      39.9   
2017-01-01     25.5    28.8     29.4      23.4        33.6      

In [54]:
#Initializing the file output
file_to_output = os.path.join("State_Population.csv") 

In [55]:
# Created a dictionary to store data on home values for each state
state_population_data = {}

# Collecting data on home values for each state
for state, abbreviation in states.items():
    # Construct the series ID
    series_id = f"{abbreviation}UCSFRCONDOSMSAMID"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    population = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    state_population_data[state] = population

# Convert the data to a DataFrame
state_population_df = pd.DataFrame(state_population_data)

# Indexed the date
state_population_df.index.name = "Date"


In [56]:
# Print the DataFrame
pprint(state_population_df)

# Write to CSV file
state_population_df.to_csv("../Data/State_Population.csv")

                  Alabama         Alaska        Arizona       Arkansas  \
Date                                                                     
2004-01-01  112219.633618  185288.975756  166447.552736  101500.869884   
2004-02-01  112475.256485  185888.909611  167310.459152  101954.719972   
2004-03-01  112761.571360  186314.678930  168360.184614  102476.545194   
2004-04-01  113054.064625  187413.877739  169635.828698  103054.125900   
2004-05-01  113414.762519  188606.956867  171189.400241  103595.816419   
...                   ...            ...            ...            ...   
2024-06-01  227555.818097  360071.934909  431149.390069  207078.541330   
2024-07-01  227630.204334  360252.045076  430780.449052  207508.259309   
2024-08-01  227490.517839  360334.979924  429979.908306  207895.888015   
2024-09-01  227368.087861  360882.671302  429279.829369  207976.605244   
2024-10-01  227508.317173  362098.348756  428710.722473  208078.444321   

               California       Color

In [57]:
#Initializing the file output
file_to_output = os.path.join("Unemployment.csv") 

In [58]:
# Created a dictionary to store unemployment data on all states
unemployment_data = {}

# Collecting unemplyment data for each state
for state, abbreviation in states.items():
    # Created a series ID for each state
    series_id = f"{abbreviation}UR"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    unemployment_rate = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    unemployment_data[state] = unemployment_rate

# Converted the data to a DataFrame
unemployment_df = pd.DataFrame(unemployment_data)

# Indexed the date
unemployment_df.index.name = "Date"

In [59]:
# Print the DataFrame
pprint(unemployment_df)

# Write to CSV file
unemployment_df.to_csv("../Data/Unemployment.csv")

            Alabama  Alaska  Arizona  Arkansas  California  Colorado  \
Date                                                                   
2004-01-01      5.9     7.6      5.4       5.7         6.6       5.8   
2004-02-01      5.9     7.6      5.3       5.7         6.5       5.8   
2004-03-01      5.8     7.6      5.3       5.7         6.5       5.7   
2004-04-01      5.8     7.6      5.2       5.7         6.4       5.6   
2004-05-01      5.7     7.5      5.1       5.7         6.4       5.6   
...             ...     ...      ...       ...         ...       ...   
2024-06-01      2.9     4.5      3.3       3.3         5.2       3.8   
2024-07-01      2.8     4.5      3.4       3.3         5.2       3.9   
2024-08-01      2.8     4.6      3.4       3.3         5.3       4.0   
2024-09-01      2.9     4.5      3.5       3.3         5.3       4.0   
2024-10-01      2.9     4.6      3.6       3.3         5.4       4.1   

            Connecticut  Delaware  District of Columbia  Florid

In [61]:
#Initializing the file output
file_to_output = os.path.join("Vacancy_Rate.csv") 

In [62]:
# list of states
states = {"United States": "US",
         "Alabama": "AL",
         "Alaska": "AK",
         "Arizona": "AZ",
         "Arkansas": "AR",
         "California": "CA",
         "Colorado":"CO",
         "Connecticut": "CT",
         "Delaware": "DE",
         "District of Columbia": "DC",
         "Florida": "FL",
         "Georgia": "GA",
         "Hawaii": "HI",
         "Idaho": "ID",
         "Illinois": "IL", 
         "Indiana": "IN",
         "Iowa": "IA",
         "Kansas": "KS",
         "Kentucky": "KY",
         "Louisiana": "LA",
         "Maine": "ME",
         "Maryland": "MD",
         "Massachusetts":"MA",
         "Michigan": "MI",
         "Minnesota": "MN",
         "Mississippi": "MS",
         "Missouri": "MO",
         "Montana": "MT",
         "Nebraska": "NE",
         "Nevada": "NV",
         "New Hampshire": "NH",
         "New Jersey": "NJ",
         "New Mexico": "NM", 
         "New York": "NY",
         "North Carolina": "NC",
         "North Dakota": "ND",
         "Ohio": "OH",
         "Oklahoma": "OK",
         "Oregon": "OR",
         "Pennsylvania": "PA",
         "Rhode Island": "RI",
         "South Carolina": "SC",
         "South Dakota": "SD",
         "Tennessee": "TN",
         "Texas": "TX",
         "Utah": "UT",
         "Vermont": "VT",
         "Virginia": "VA",
         "Washington": "WA",
         "West Virginia": "WV",
         "Wisconsin": "WI",
         "Wyoming": "WY",
         }

In [63]:
# Created a dictionary to store home vacancy data on all states
vacancy_rate_data = {}

# Collecting home vacancy data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"{abbreviation}HVAC"
    
    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    vacancy_rate = fred.get_series(series_id, observation_start="2004-01-01")
    
    # Add the data to the dictionary
    vacancy_rate_data[state] = vacancy_rate

# Convert the data to a DataFrame
vacancy_rate_df = pd.DataFrame(vacancy_rate_data)

# Indexed the date
vacancy_rate_df.index.name = "Date"



In [64]:
# Print the DataFrame
pprint(vacancy_rate_df.head())

# Write to CSV file
vacancy_rate_df.to_csv("../Data/Vacancy_Rate.csv")

            United States  Alabama  Alaska  Arizona  Arkansas  California  \
Date                                                                        
2004-01-01            1.7      2.5     1.8      1.6       2.3         0.9   
2005-01-01            1.9      2.0     1.5      1.2       2.3         1.2   
2006-01-01            2.4      2.8     1.7      3.0       1.9         1.9   
2007-01-01            2.7      2.9     2.6      3.8       2.2         2.3   
2008-01-01            2.8      3.3     2.0      3.8       2.9         3.1   

            Colorado  Connecticut  Delaware  District of Columbia  ...  \
Date                                                               ...   
2004-01-01       2.8          0.9       1.4                   2.4  ...   
2005-01-01       2.7          1.3       1.7                   2.0  ...   
2006-01-01       3.0          2.1       2.6                   3.7  ...   
2007-01-01       3.1          2.3       2.8                   3.2  ...   
2008-01-01      

In [65]:
#Initializing the file output
file_to_output = os.path.join("Rental_Vacancy.csv") 

In [66]:
# Created a dictionary to store rental vacancy data on all states
rental_vacancy_data = {}

# Collecting rental vacancy data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"{abbreviation}RVAC"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    rental_data = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    rental_vacancy_data[state] = rental_data

# Convert the data to a DataFrame
rental_vacancy_df = pd.DataFrame(rental_vacancy_data)

# Indexed the date
rental_vacancy_df.index.name = "Date"



In [67]:
# Print the DataFrame
pprint(rental_vacancy_df)

# Write to CSV file
rental_vacancy_df.to_csv("../Data/Rental_Vacancy.csv")

            United States  Alabama  Alaska  Arizona  Arkansas  California  \
Date                                                                        
2004-01-01           10.2     14.8     7.0     11.3      13.5         5.4   
2005-01-01            9.8     13.2     7.9     11.6      13.7         6.0   
2006-01-01            9.7     13.4     8.5      9.3      13.5         5.8   
2007-01-01            9.7     15.3     7.7      9.6      11.8         6.1   
2008-01-01           10.0     15.3     8.7     14.1      13.4         6.4   
2009-01-01           10.6     14.4     7.9     17.7      13.0         7.6   
2010-01-01           10.2     12.1     5.6     14.9      11.4         7.5   
2011-01-01            9.5     14.2     5.8     12.4      12.6         6.1   
2012-01-01            8.7     11.1     4.7     11.4      10.6         5.2   
2013-01-01            8.3     11.8     5.3     10.8      13.2         5.1   
2014-01-01            7.6     14.0     6.8      9.5      14.7         4.5   

In [68]:
#Initializing the file output
file_to_output = os.path.join("Homeownership_Rate.csv") 

In [69]:
# Created a dictionary to store home ownership data on all states
ownership_rate_data = {}

# Collecting home ownership data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"{abbreviation}HOWN"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    ownership_data = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    ownership_rate_data[state] = ownership_data

# Convert the data to a DataFrame
ownership_rate_df = pd.DataFrame(ownership_rate_data)

# Indexed the date
ownership_rate_df.index.name = "Date"


In [70]:
# Print the DataFrame
pprint(ownership_rate_df)

# Write to CSV file
ownership_rate_df.to_csv("../Data/Homeownership_Rate.csv")

            United States  Alabama  Alaska  Arizona  Arkansas  California  \
Date                                                                        
2004-01-01           69.0     78.0    67.2     68.7      69.1        59.7   
2005-01-01           68.9     76.6    66.0     71.1      69.2        59.7   
2006-01-01           68.8     74.2    67.2     71.6      70.8        60.2   
2007-01-01           68.1     73.3    66.6     70.4      69.5        58.3   
2008-01-01           67.8     73.0    66.4     69.1      68.9        57.5   
2009-01-01           67.4     74.1    66.8     68.9      68.5        57.0   
2010-01-01           66.9     73.2    65.7     66.6      67.9        56.1   
2011-01-01           66.1     72.9    64.4     66.0      67.6        55.3   
2012-01-01           65.4     71.9    63.7     65.3      66.0        54.5   
2013-01-01           65.1     72.7    64.6     65.1      65.4        54.3   
2014-01-01           64.5     72.1    64.9     63.5      65.4        54.2   

In [71]:
#Initializing the file output
file_to_output = os.path.join("Home_Value.csv") 

In [72]:
# list of states
states = {"United States": "USA",
         "Alabama": "AL",
         "Alaska": "AK",
         "Arizona": "AZ",
         "Arkansas": "AR",
         "California": "CA",
         "Colorado":"CO",
         "Connecticut": "CT",
         "Delaware": "DE",
         "District of Columbia": "DC",
         "Florida": "FL",
         "Georgia": "GA",
         "Hawaii": "HI",
         "Idaho": "ID",
         "Illinois": "IL", 
         "Indiana": "IN",
         "Iowa": "IA",
         "Kansas": "KS",
         "Kentucky": "KY",
         "Louisiana": "LA",
         "Maine": "ME",
         "Maryland": "MD",
         "Massachusetts":"MA",
         "Michigan": "MI",
         "Minnesota": "MN",
         "Mississippi": "MS",
         "Missouri": "MO",
         "Montana": "MT",
         "Nebraska": "NE",
         "Nevada": "NV",
         "New Hampshire": "NH",
         "New Jersey": "NJ",
         "New Mexico": "NM", 
         "New York": "NY",
         "North Carolina": "NC",
         "North Dakota": "ND",
         "Ohio": "OH",
         "Oklahoma": "OK",
         "Oregon": "OR",
         "Pennsylvania": "PA",
         "Rhode Island": "RI",
         "South Carolina": "SC",
         "South Dakota": "SD",
         "Tennessee": "TN",
         "Texas": "TX",
         "Utah": "UT",
         "Vermont": "VT",
         "Virginia": "VA",
         "Washington": "WA",
         "West Virginia": "WV",
         "Wisconsin": "WI",
         "Wyoming": "WY",
         }

In [73]:
# Created a dictionary to store data on home values for each state
home_value_data = {}

# Collecting data on home values for each state
for state, abbreviation in states.items():
    # Construct the series ID
    series_id = f"{abbreviation}UCSFRCONDOSMSAMID"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    home_value = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    home_value_data[state] = home_value

# Convert the data to a DataFrame
home_value_df = pd.DataFrame(home_value_data)

# Indexed the date
home_value_df.index.name = "Date"


In [74]:
# Print the DataFrame
pprint(home_value_df)

# Write to CSV file
home_value_df.to_csv("../Data/Home_Value.csv")

            United States        Alabama         Alaska        Arizona  \
Date                                                                     
2004-01-01  160797.159759  112219.633618  185288.975756  166447.552736   
2004-02-01  161841.215869  112475.256485  185888.909611  167310.459152   
2004-03-01  162993.075508  112761.571360  186314.678930  168360.184614   
2004-04-01  164280.757755  113054.064625  187413.877739  169635.828698   
2004-05-01  165730.747214  113414.762519  188606.956867  171189.400241   
...                   ...            ...            ...            ...   
2024-06-01  357675.012344  227555.818097  360071.934909  431149.390069   
2024-07-01  357822.347127  227630.204334  360252.045076  430780.449052   
2024-08-01  358064.560426  227490.517839  360334.979924  429979.908306   
2024-09-01  358513.017685  227368.087861  360882.671302  429279.829369   
2024-10-01  359098.757006  227508.317173  362098.348756  428710.722473   

                 Arkansas     Califor

In [76]:
file_to_output = os.path.join("../Data/Housing_Inventory(New_Listings).csv") 

In [77]:
# list of states
states = {"United States": "US",
         "Alabama": "AL",
         "Alaska": "AK",
         "Arizona": "AZ",
         "Arkansas": "AR",
         "California": "CA",
         "Colorado":"CO",
         "Connecticut": "CT",
         "Delaware": "DE",
        #  "District of Columbia": "DC", # Info Not Available
         "Florida": "FL",
         "Georgia": "GA",
         "Hawaii": "HI",
         "Idaho": "ID",
         "Illinois": "IL", 
         "Indiana": "IN",
         "Iowa": "IA",
         "Kansas": "KS",
         "Kentucky": "KY",
         "Louisiana": "LA",
         "Maine": "ME",
         "Maryland": "MD",
         "Massachusetts":"MA",
         "Michigan": "MI",
         "Minnesota": "MN",
         "Mississippi": "MS",
         "Missouri": "MO",
         "Montana": "MT",
         "Nebraska": "NE",
         "Nevada": "NV",
         "New Hampshire": "NH",
         "New Jersey": "NJ",
         "New Mexico": "NM", 
         "New York": "NY",
         "North Carolina": "NC",
         "North Dakota": "ND",
         "Ohio": "OH",
         "Oklahoma": "OK",
         "Oregon": "OR",
         "Pennsylvania": "PA",
         "Rhode Island": "RI",
         "South Carolina": "SC",
         "South Dakota": "SD",
         "Tennessee": "TN",
         "Texas": "TX",
         "Utah": "UT",
         "Vermont": "VT",
         "Virginia": "VA",
         "Washington": "WA",
         "West Virginia": "WV",
         "Wisconsin": "WI",
         "Wyoming": "WY",
         }

In [78]:
# Created a dictionary to store housing inventory data for each state
housing_inventory_data = {}

# Collecting housing inventory data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"NEWLISCOU{abbreviation}"
    
    # Collecting the series data from FRED
    # From July 2016 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    housing_inventory = fred.get_series(series_id, observation_start="2016-07-01")
    
    # Add the data to the dictionary
    housing_inventory_data[state] = housing_inventory

# Convert the data to a DataFrame
housing_inventory_df = pd.DataFrame(housing_inventory_data)

# Indexed the date
housing_inventory_df.index.name = "Date"




In [79]:
# Print the DataFrame
pprint(housing_inventory_df)

# Write to CSV file
housing_inventory_df.to_csv("../Data/Housing_Inventory(New_Listings).csv")

            United States  Alabama  Alaska  Arizona  Arkansas  California  \
Date                                                                        
2016-07-01       527576.0   7756.0  1224.0  12196.0    4376.0     47100.0   
2016-08-01       470780.0   6882.0  1082.0  12218.0    3954.0     45164.0   
2016-09-01       452994.0   6674.0   898.0  12886.0    3798.0     41326.0   
2016-10-01       413376.0   6096.0   788.0  13464.0    3776.0     39896.0   
2016-11-01       376704.0   5332.0   600.0  12174.0    3464.0     34064.0   
...                   ...      ...     ...      ...       ...         ...   
2024-06-01       430076.0   7328.0  1140.0   9288.0    4356.0     32560.0   
2024-07-01       405404.0   7064.0  1070.0   8602.0    4512.0     31338.0   
2024-08-01       383552.0   7016.0   916.0   8992.0    4356.0     30632.0   
2024-09-01       399750.0   6708.0   742.0  10164.0    3850.0     31988.0   
2024-10-01       365690.0   6442.0   624.0   9992.0    3630.0     28958.0   

In [80]:
#Initializing the file output
file_to_output = os.path.join("Median_Income.csv") 

In [81]:
# list of states
states = {"United States": "US",
         "Alabama": "USAL",
         "Alaska": "USAK",
         "Arizona": "USAZ",
         "Arkansas": "USAR",
         "California": "USCA",
         "Colorado":"USCO",
         "Connecticut": "USCT",
         "Delaware": "USDE",
         "District of Columbia": "USDC",
         "Florida": "USFL",
         "Georgia": "USGA",
         "Hawaii": "USHI",
         "Idaho": "USID",
         "Illinois": "USIL", 
         "Indiana": "USIN",
         "Iowa": "USIA",
         "Kansas": "USKS",
         "Kentucky": "USKY",
         "Louisiana": "USLA",
         "Maine": "USME",
         "Maryland": "USMD",
         "Massachusetts":"USMA",
         "Michigan": "USMI",
         "Minnesota": "USMN",
         "Mississippi": "USMS",
         "Missouri": "USMO",
         "Montana": "USMT",
         "Nebraska": "USNE",
         "Nevada": "USNV",
         "New Hampshire": "USNH",
         "New Jersey": "USNJ",
         "New Mexico": "USNM", 
         "New York": "USNY",
         "North Carolina": "USNC",
         "North Dakota": "USND",
         "Ohio": "USOH",
         "Oklahoma": "USOK",
         "Oregon": "USOR",
         "Pennsylvania": "USPA",
         "Rhode Island": "USRI",
         "South Carolina": "USSC",
         "South Dakota": "USSD",
         "Tennessee": "USTN",
         "Texas": "USTX",
         "Utah": "USUT",
         "Vermont": "USVT",
         "Virginia": "USVA",
         "Washington": "USWA",
         "West Virginia": "USWV",
         "Wisconsin": "USWI",
         "Wyoming": "USWY",
         }

In [82]:
# Created a dictionary to store income data on all states
state_income_data = {}

# Collecting median income data for each state
for state, abbreviation in states.items():
    # Construct the series ID for each state
    series_id = f"MEHOIN{abbreviation}A646N"

    # Collecting the series data from FRED
    # From 2004 to present
    # URL is below for reference
    # https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={api_key}&file_type=json
    income_data = fred.get_series(series_id, observation_start="2004-01-01")

    # Add the data to the dictionary
    state_income_data[state] = income_data

# Convert the data to a DataFrame
state_income_df = pd.DataFrame(state_income_data)

# Indexed the date
state_income_df.index.name = "Date"



In [84]:
# Print the DataFrame
pprint(state_income_df)

# Write to CSV file
state_income_df.to_csv("../Data/Median_Income.csv")

            United States  Alabama   Alaska  Arizona  Arkansas  California  \
Date                                                                         
2004-01-01        44330.0  36630.0  55060.0  43850.0   34980.0     49220.0   
2005-01-01        46330.0  37150.0  55890.0  45250.0   36660.0     51760.0   
2006-01-01        48200.0  37950.0  56420.0  46660.0   37060.0     55320.0   
2007-01-01        50230.0  42210.0  62990.0  47220.0   40800.0     55730.0   
2008-01-01        50300.0  44480.0  63990.0  46910.0   39590.0     57010.0   
2009-01-01        49780.0  39980.0  61600.0  45740.0   36540.0     56130.0   
2010-01-01        49280.0  40930.0  57850.0  46900.0   38590.0     54280.0   
2011-01-01        50050.0  42590.0  57430.0  48620.0   41300.0     53370.0   
2012-01-01        51020.0  43460.0  63650.0  47040.0   39020.0     57020.0   
2013-01-01        53590.0  47320.0  72470.0  52610.0   39380.0     60790.0   
2014-01-01        53660.0  42280.0  67630.0  49250.0   44920.0  

In [None]:
# All the data above was pulled from FRED API
# Additional csv data was pulled directly from the
# Federal reserve FRED website: 
# https://fred.stlouisfed.org/
# Other resources include: 
# https://www.hudexchange.info/resource/3031/pit-and-hic-data-since-2007/
# and
# https://www.fedprimerate.com/wall_street_journal_prime_rate_history.htm


In [None]:
# Data below was analyzed using Geoapify API. 

In [6]:
# load csv data for Housing Inventory (New Listings)
housing_inventory = Path("../Data/Housing_Inventory(New_Listings).csv")
home_inventory_df = pd.read_csv(housing_inventory)
home_inventory_df



Unnamed: 0,Date,United States,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,2016-07-01,527576.0,7756.0,1224.0,12196.0,4376.0,47100.0,13240.0,5688.0,1848.0,...,1172.0,11528.0,39148.0,5804.0,1456.0,13760.0,13136.0,1988.0,9276.0,1292.0
1,2016-08-01,470780.0,6882.0,1082.0,12218.0,3954.0,45164.0,12522.0,4924.0,1774.0,...,1100.0,11270.0,34468.0,5840.0,1372.0,12936.0,12966.0,2046.0,8210.0,1114.0
2,2016-09-01,452994.0,6674.0,898.0,12886.0,3798.0,41326.0,10848.0,5476.0,2154.0,...,1014.0,10436.0,31578.0,5036.0,1072.0,12786.0,10688.0,1734.0,7196.0,930.0
3,2016-10-01,413376.0,6096.0,788.0,13464.0,3776.0,39896.0,9768.0,4892.0,1592.0,...,860.0,9744.0,30980.0,4692.0,804.0,11100.0,9704.0,1848.0,6596.0,896.0
4,2016-11-01,376704.0,5332.0,600.0,12174.0,3464.0,34064.0,8064.0,3926.0,1378.0,...,726.0,9116.0,27726.0,4300.0,670.0,9626.0,6606.0,1556.0,6658.0,686.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2024-06-01,430076.0,7328.0,1140.0,9288.0,4356.0,32560.0,11180.0,3968.0,1304.0,...,1160.0,11792.0,42392.0,5112.0,1036.0,10944.0,11008.0,1780.0,6504.0,960.0
96,2024-07-01,405404.0,7064.0,1070.0,8602.0,4512.0,31338.0,10120.0,3576.0,1268.0,...,1100.0,10710.0,37580.0,4576.0,966.0,10482.0,10058.0,1676.0,6582.0,940.0
97,2024-08-01,383552.0,7016.0,916.0,8992.0,4356.0,30632.0,9296.0,3132.0,1300.0,...,1044.0,10696.0,35616.0,4532.0,944.0,9532.0,8768.0,1660.0,6544.0,868.0
98,2024-09-01,399750.0,6708.0,742.0,10164.0,3850.0,31988.0,9398.0,3644.0,1394.0,...,1032.0,15472.0,34858.0,4374.0,968.0,10618.0,9542.0,1680.0,6026.0,758.0


In [9]:
# Converted to a dataframe
home_inventory_df = pd.DataFrame(home_inventory_df)

# Formatted 'Date' column to datetime
home_inventory_df['Date'] = pd.to_datetime(home_inventory_df['Date'])

# Set index to 'Date'
home_inventory_df.set_index('Date', inplace=True)

# Calculated the new home listing averages for each state
st_home_inventory_df = home_inventory_df.mean()

# reset index
st_home_inventory_df = st_home_inventory_df.reset_index()

# Select columns 'State' and 'Housint Inventory Avg'
st_home_inventory_df.columns = ['State', 'Housing Inventory Avg']

# Sort values in descending order
st_home_inventory_df = st_home_inventory_df.sort_values(by='Housing Inventory Avg',ascending=False)

# Removing the United States from the dataframe
st_home_inventory_df = st_home_inventory_df.drop(0)

# states with the highest number of new home listings
st_home_inventory_df.head()

Unnamed: 0,State,Housing Inventory Avg
9,Florida,42927.84
43,Texas,36048.86
5,California,35925.14
32,New York,16700.5
10,Georgia,15975.6


In [10]:
# states with the lowest number of new home listings
st_home_inventory_df.tail()

Unnamed: 0,State,Housing Inventory Avg
41,South Dakota,981.54
2,Alaska,931.54
50,Wyoming,880.24
45,Vermont,867.96
34,North Dakota,865.34


In [11]:
# creating a list from the states
States = st_home_inventory_df["State"].tolist()


In [12]:
# created an empty list
latitudes = []
longitudes = []

# Geoapify API key
geoapify_key = geoapify_key

# Looped through each state to obtain lat and lon
for state in States:
    target_url = f"https://api.geoapify.com/v1/geocode/search?state={state}&filter=countrycode:us&format=json&apiKey={geoapify_key}"
    response = requests.get(target_url).json()
    
    try:
        # Extracted latitude and longitude
        lat = response["results"][0]["lat"]
        lon = response["results"][0]["lon"]
        latitudes.append(lat)
        longitudes.append(lon)
    except (IndexError, KeyError):
        # Handled cases where no result is found
        latitudes.append(None)
        longitudes.append(None)

# Created a DataFrame with results
geo_df = pd.DataFrame({
    "State": States,
    "Latitude": latitudes,
    "Longitude": longitudes
})

# Display results
geo_df

Unnamed: 0,State,Latitude,Longitude
0,Florida,27.756767,-81.463983
1,Texas,31.26389,-98.545612
2,California,36.701463,-118.755997
3,New York,43.156168,-75.844995
4,Georgia,32.329381,-83.113737
5,Illinois,40.079661,-89.433729
6,North Carolina,35.672964,-79.039292
7,Pennsylvania,40.969989,-77.727883
8,Ohio,40.225357,-82.68814
9,Michigan,43.621195,-84.682435


In [13]:
# Merged st_home_inventory_df with geo_df
state_visual_data = pd.merge(st_home_inventory_df,geo_df, how="outer", on="State")

# scaled down the housing inventory averages by dividing all figures by 100
# the intended purpose is so you could visual see the scale of housing inventory across the us
# without it the figures were oversized and overclustered. 
state_visual_data['Housing Inventory Avg'] = state_visual_data['Housing Inventory Avg']/100

# Displayed results
state_visual_data

Unnamed: 0,State,Housing Inventory Avg,Latitude,Longitude
0,Alabama,67.7432,33.258882,-86.829534
1,Alaska,9.3154,64.445961,-149.680909
2,Arizona,117.854,34.395342,-111.763275
3,Arkansas,38.4172,35.204888,-92.447911
4,California,359.2514,36.701463,-118.755997
5,Colorado,98.6012,38.725178,-105.607716
6,Connecticut,44.88,41.65002,-72.734216
7,Delaware,15.373,38.692045,-75.401331
8,Florida,429.2784,27.756767,-81.463983
9,Georgia,159.756,32.329381,-83.113737


In [14]:
# Configured the map plot
state_map = state_visual_data.hvplot.points(
    "Longitude",
    "Latitude",
    geo=True,
    tiles="OSM",
    frame_width=1400,
    frame_height=1155,  # Adjust frame size if needed
    size="Housing Inventory Avg",
    scale=2,
    color="State",
    hover_cols=["State", "Housing Inventory Avg", "Latitude", "Longitude"]
)

state_map.opts(
    legend_position='right',
    legend_opts={'title_text_font_size': '10pt', 'label_text_font_size': '8pt'}  # Adjust sizes
)
# Displayed map
state_map