In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import os
import requests
import time
from pathlib import Path
from scipy import stats

In [2]:
# Set path to csv
home_values = Path("..\Resources\cleaned_home_values.csv")
monthly_rent_cost = Path("..\Resources\cleaned_rent_cost.csv")

# Reading into dataframes
home_values_df = pd.read_csv(home_values)
rent_cost_df = pd.read_csv(monthly_rent_cost)

In [3]:
# home_values_df.set_index('Date', inplace=True)
home_values_df.head()

Unnamed: 0,Date,"New York, NY","Los Angeles, CA","Chicago, IL","Dallas, TX","Houston, TX","Washington, DC","Philadelphia, PA","Miami, FL","Atlanta, GA",...,"Helena, AR","Spencer, IA","Atchison, KS","Fairfield, IA","Sweetwater, TX","Zapata, TX","Ketchikan, AK","Craig, CO","Vernon, TX","Lamesa, TX"
0,2015-01-31,404171.5878,513958.2964,191213.5856,172921.0387,174429.271,371339.4203,202454.3216,209432.0875,167916.9208,...,46394.09635,108748.006,72661.74894,111158.5424,93675.89371,100668.8718,255155.9638,151088.0637,64786.76642,65550.97273
1,2015-02-28,404367.838,515749.3789,191520.7446,174428.1166,176102.5099,372051.2334,202904.1903,211138.2512,168796.228,...,45799.75314,109802.6414,72975.01407,111861.953,94422.70054,100913.8233,256713.2678,151688.4571,64526.92522,65859.05492
2,2015-03-31,404832.0151,516131.215,192050.9421,175918.6479,177750.8996,372930.4566,203415.9047,212695.318,169713.7945,...,45649.45496,111039.5894,73487.92106,112231.5322,95275.96317,100863.2065,257936.1849,152391.2066,64317.33294,66311.33982
3,2015-04-30,405433.0387,516280.392,192570.5118,177289.4804,179226.1953,373417.5969,203907.7575,214274.7371,170737.8647,...,45990.95439,111977.4806,73917.5255,112830.7233,96407.96016,101236.8235,259322.8323,153189.1511,64330.33953,66790.144
4,2015-05-31,406635.1014,516921.471,193432.5615,178724.2699,180628.5009,373588.1045,204498.8401,216005.1963,171866.853,...,46591.73331,112916.7833,74548.7244,112940.7075,97887.04331,101982.7193,260963.3452,153807.7973,64688.41192,67140.75242


In [4]:
home_values_df.dtypes

Date                object
New York, NY       float64
Los Angeles, CA    float64
Chicago, IL        float64
Dallas, TX         float64
                    ...   
Zapata, TX         float64
Ketchikan, AK      float64
Craig, CO          float64
Vernon, TX         float64
Lamesa, TX         float64
Length: 844, dtype: object

In [5]:
# total dates in home values
total_dates = len(home_values_df)
total_dates

110

In [6]:
# total cities in home values
total_cities = home_values_df.columns.unique()
total_cities

Index(['Date', 'New York, NY', 'Los Angeles, CA', 'Chicago, IL', 'Dallas, TX',
       'Houston, TX', 'Washington, DC', 'Philadelphia, PA', 'Miami, FL',
       'Atlanta, GA',
       ...
       'Helena, AR', 'Spencer, IA', 'Atchison, KS', 'Fairfield, IA',
       'Sweetwater, TX', 'Zapata, TX', 'Ketchikan, AK', 'Craig, CO',
       'Vernon, TX', 'Lamesa, TX'],
      dtype='object', length=844)

In [7]:
# count total cities
len(total_cities)

844

In [8]:
# create a cities list
cities_list = total_cities.tolist()
cities_list.remove('Date')
len(cities_list)

843

In [9]:
# creating a dataframe for home values stats
home_values_summary = pd.DataFrame(home_values_df.describe())
home_values_summary

Unnamed: 0,"New York, NY","Los Angeles, CA","Chicago, IL","Dallas, TX","Houston, TX","Washington, DC","Philadelphia, PA","Miami, FL","Atlanta, GA","Boston, MA",...,"Helena, AR","Spencer, IA","Atchison, KS","Fairfield, IA","Sweetwater, TX","Zapata, TX","Ketchikan, AK","Craig, CO","Vernon, TX","Lamesa, TX"
count,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,...,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0
mean,504641.396244,693217.750049,240657.905135,269426.633632,232776.567883,446517.100105,258500.446585,315934.470704,257497.144946,500563.475512,...,51553.914902,137592.627812,106394.398734,137720.557563,106256.690384,114954.466691,321526.269618,193033.627989,75570.895681,79969.418607
std,70485.72188,139384.356598,34296.246509,65332.384556,42903.643058,61082.904297,45653.107405,80569.278794,69462.401915,87458.866693,...,3502.190489,20721.139959,25982.332385,20034.985449,6778.529457,11019.548385,44862.381329,41430.789399,10606.657728,8914.169779
min,404171.5878,504210.952,191213.5856,172921.0387,174429.271,371339.4203,202454.3216,209432.0875,167916.9208,369722.2663,...,43735.44237,108748.006,72661.74894,111158.5424,93675.89371,100668.8718,255155.9638,151088.0637,63957.72369,65550.97273
25%,446514.78145,579028.854925,214757.168825,221007.9332,199511.291275,395457.98535,220416.478825,255424.019925,200230.24875,430759.6765,...,49232.751482,122081.517225,85312.108453,121473.518625,100809.5101,105859.799275,278504.122275,158475.635975,64982.3346,73009.112457
50%,490761.58105,666517.8526,232004.8777,251006.12745,217387.82805,424983.57945,241674.33915,288880.06465,236766.37525,477442.01545,...,52358.794875,130139.3621,95342.35589,130185.6979,105350.7575,111669.19425,317210.86225,176976.0076,72906.12455,76993.720015
75%,567591.1219,824774.455375,269349.63335,322567.54375,267929.58255,505839.601975,295974.754825,366699.680325,316832.6188,574956.626675,...,53772.2767,154073.928525,129886.95845,155149.219825,111654.66255,124344.257875,366604.936175,221190.632775,84881.929497,88370.121742
max,634651.2923,938807.2812,305231.3311,382698.4094,307630.1456,549546.6768,347370.6386,477916.5908,376063.3604,664491.2566,...,58560.70096,175048.2282,152461.3727,174917.8385,118673.7981,135409.4334,391224.6076,276980.9914,95048.69878,97240.15249


In [10]:
# Exporting to csv
home_values_summary.to_csv('..\Output\hv_stats.csv', index=False)

In [11]:
# finding the percent change between months in the home_values dataset for each city
home_values_set = home_values_df.set_index('Date')
home_values_numeric = home_values_set.apply(pd.to_numeric, errors = 'coerce')
home_values_pct_changes = pd.DataFrame(home_values_numeric.pct_change())
home_values_pct_changes

Unnamed: 0_level_0,"New York, NY","Los Angeles, CA","Chicago, IL","Dallas, TX","Houston, TX","Washington, DC","Philadelphia, PA","Miami, FL","Atlanta, GA","Boston, MA",...,"Helena, AR","Spencer, IA","Atchison, KS","Fairfield, IA","Sweetwater, TX","Zapata, TX","Ketchikan, AK","Craig, CO","Vernon, TX","Lamesa, TX"
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-31,,,,,,,,,,,...,,,,,,,,,,
2015-02-28,0.000486,0.003485,0.001606,0.008715,0.009593,0.001917,0.002222,0.008147,0.005237,0.007087,...,-0.012811,0.009698,0.004311,0.006328,0.007972,0.002433,0.006103,0.003974,-0.004011,0.004700
2015-03-31,0.001148,0.000740,0.002768,0.008545,0.009360,0.002363,0.002522,0.007375,0.005436,0.006550,...,-0.003282,0.011265,0.007029,0.003304,0.009037,-0.000502,0.004764,0.004633,-0.003248,0.006867
2015-04-30,0.001485,0.000289,0.002705,0.007792,0.008300,0.001306,0.002418,0.007426,0.006034,0.004920,...,0.007481,0.008446,0.005846,0.005339,0.011881,0.003704,0.005376,0.005236,0.000202,0.007221
2015-05-31,0.002965,0.001242,0.004477,0.008093,0.007824,0.000457,0.002899,0.008076,0.006612,0.004142,...,0.013063,0.008388,0.008539,0.000975,0.015342,0.007368,0.006326,0.004038,0.005566,0.005249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-31,0.003751,0.012700,0.004445,0.000308,0.000084,0.002088,0.004902,0.006093,0.003381,0.007018,...,-0.004751,0.003554,0.006773,-0.002188,-0.008182,-0.012951,-0.008691,0.004386,-0.011602,-0.011435
2023-11-30,0.003682,0.009438,0.002966,-0.000373,-0.000936,0.001204,0.003934,0.006138,0.002522,0.005212,...,-0.003429,0.002208,0.004541,-0.000221,-0.009943,-0.017784,-0.007284,0.002567,-0.016290,-0.009186
2023-12-31,0.004384,0.005871,0.002052,-0.000878,-0.001359,0.001026,0.003373,0.005648,0.001791,0.004139,...,-0.010993,0.001476,0.002107,0.002020,-0.017549,-0.016797,-0.006488,-0.000171,-0.014734,-0.014961
2024-01-31,0.004556,0.000167,0.002130,0.000058,-0.000020,0.000770,0.003438,0.004991,0.001452,0.002965,...,-0.013514,-0.000992,0.002979,0.003028,-0.018896,-0.014170,-0.000596,-0.002688,-0.008058,-0.008268


In [12]:
# Exporting to csv
home_values_pct_changes.to_csv('..\Output\hv_pct_changes.csv', index=False)

In [13]:
# stats from pct_changes
hv_pct_changes_stats = pd.DataFrame(home_values_pct_changes.describe())
hv_pct_changes_stats

Unnamed: 0,"New York, NY","Los Angeles, CA","Chicago, IL","Dallas, TX","Houston, TX","Washington, DC","Philadelphia, PA","Miami, FL","Atlanta, GA","Boston, MA",...,"Helena, AR","Spencer, IA","Atchison, KS","Fairfield, IA","Sweetwater, TX","Zapata, TX","Ketchikan, AK","Craig, CO","Vernon, TX","Lamesa, TX"
count,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,...,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0
mean,0.004154,0.005551,0.004309,0.00707,0.005093,0.003612,0.004974,0.007613,0.007442,0.005405,...,-0.000467,0.004361,0.006626,0.00386,0.00075,0.000682,0.003547,0.005558,0.002523,0.001218
std,0.003381,0.008743,0.004253,0.006816,0.005306,0.004253,0.004244,0.005542,0.006045,0.004888,...,0.0122,0.004128,0.007528,0.005756,0.007408,0.009494,0.004444,0.004758,0.009063,0.010477
min,-0.004174,-0.012533,-0.005639,-0.006777,-0.005776,-0.008468,-0.003259,-0.001488,-0.004616,-0.008552,...,-0.030621,-0.003688,-0.018109,-0.01338,-0.024001,-0.033539,-0.008691,-0.002895,-0.027258,-0.022358
25%,0.002271,-0.000561,0.001606,0.002221,0.002117,0.001204,0.002536,0.004585,0.004202,0.002965,...,-0.007814,0.001654,0.002796,0.000458,-0.002659,-0.004028,0.000624,0.001799,-0.003248,-0.006272
50%,0.00382,0.005922,0.00402,0.007718,0.00374,0.002887,0.00384,0.006534,0.00651,0.005238,...,4.9e-05,0.003636,0.00591,0.003496,0.001824,0.001833,0.003914,0.004885,0.002732,0.002854
75%,0.005759,0.011984,0.005974,0.010044,0.007335,0.004885,0.005312,0.008461,0.008262,0.007231,...,0.008259,0.006657,0.011402,0.006919,0.00535,0.007349,0.006262,0.00816,0.007406,0.009776
max,0.012118,0.024769,0.016814,0.027335,0.021178,0.015382,0.017196,0.027003,0.024669,0.019766,...,0.027622,0.015229,0.02117,0.023668,0.018238,0.019858,0.01439,0.019557,0.028641,0.020204


In [14]:
# Exporting to csv
hv_pct_changes_stats.to_csv('..\Output\hv_pct_changes_stats.csv', index=False)

In [15]:
#finding the dates with the max and min values
max_dates = {}
for city in cities_list:
    max_value_date = home_values_pct_changes[city].idxmax()
    max_dates[city] = max_value_date

max_dates
# max_dates_df = pd.DataFrame(max_dates, index=['Max Date'])
# max_dates_df



{'New York, NY': '2020-12-31',
 'Los Angeles, CA': '2017-03-31',
 'Chicago, IL': '2022-03-31',
 'Dallas, TX': '2022-03-31',
 'Houston, TX': '2022-03-31',
 'Washington, DC': '2022-03-31',
 'Philadelphia, PA': '2022-03-31',
 'Miami, FL': '2022-04-30',
 'Atlanta, GA': '2022-03-31',
 'Boston, MA': '2022-03-31',
 'Phoenix, AZ': '2021-06-30',
 'San Francisco, CA': '2022-03-31',
 'Riverside, CA': '2021-06-30',
 'Detroit, MI': '2022-03-31',
 'Seattle, WA': '2022-03-31',
 'Minneapolis, MN': '2021-05-31',
 'San Diego, CA': '2022-03-31',
 'Tampa, FL': '2022-04-30',
 'Denver, CO': '2022-03-31',
 'Baltimore, MD': '2021-04-30',
 'St. Louis, MO': '2022-03-31',
 'Orlando, FL': '2022-04-30',
 'Charlotte, NC': '2022-04-30',
 'San Antonio, TX': '2022-03-31',
 'Portland, OR': '2022-03-31',
 'Sacramento, CA': '2021-05-31',
 'Pittsburgh, PA': '2022-03-31',
 'Cincinnati, OH': '2022-03-31',
 'Austin, TX': '2021-05-31',
 'Las Vegas, NV': '2022-04-30',
 'Kansas City, MO': '2022-03-31',
 'Columbus, OH': '2022-03

In [None]:
# mean_pct_changes = home_values_pct_changes.mean()

# # Get the 10 cities with the highest mean percent changes
# top_cities = mean_pct_changes.nlargest(10)

# # Print the result
# print(top_cities)

In [16]:
min_dates = {}
for city in cities_list:
    min_value_date = home_values_pct_changes[city].idxmin()
    min_dates[city] = min_value_date

min_dates

{'New York, NY': '2022-10-31',
 'Los Angeles, CA': '2023-02-28',
 'Chicago, IL': '2022-09-30',
 'Dallas, TX': '2023-01-31',
 'Houston, TX': '2023-01-31',
 'Washington, DC': '2022-09-30',
 'Philadelphia, PA': '2022-09-30',
 'Miami, FL': '2020-06-30',
 'Atlanta, GA': '2023-01-31',
 'Boston, MA': '2022-09-30',
 'Phoenix, AZ': '2022-11-30',
 'San Francisco, CA': '2022-09-30',
 'Riverside, CA': '2016-11-30',
 'Detroit, MI': '2022-09-30',
 'Seattle, WA': '2022-09-30',
 'Minneapolis, MN': '2022-09-30',
 'San Diego, CA': '2022-09-30',
 'Tampa, FL': '2023-01-31',
 'Denver, CO': '2022-09-30',
 'Baltimore, MD': '2022-09-30',
 'St. Louis, MO': '2022-09-30',
 'Orlando, FL': '2023-01-31',
 'Charlotte, NC': '2023-01-31',
 'San Antonio, TX': '2023-12-31',
 'Portland, OR': '2022-09-30',
 'Sacramento, CA': '2022-09-30',
 'Pittsburgh, PA': '2022-09-30',
 'Cincinnati, OH': '2022-09-30',
 'Austin, TX': '2022-09-30',
 'Las Vegas, NV': '2022-11-30',
 'Kansas City, MO': '2022-09-30',
 'Columbus, OH': '2023-01