A small side project (from this already-side-project!) to look at the lowest recorded temperatures in the area. 
I am looking at installing a high-efficiency heat pump that only works when temperatures are above -13F (-25C). 
When temperatures are lower than that, the heat pump stops working and the house would have no heat at the worst possible time. 
So the obvious question is: How often does it get below (or near) that temperature?

I suspect this information is available online somewhere, but I've got all this tooling right here, so what the heck.

In [None]:
import download_historical_data as dl
import os 
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use("default")  # alternative "ggplot"

HISTORICAL_DATA_DIR = os.path.abspath("./historical_data")
WEATHER_DATA_DIR = os.path.join(HISTORICAL_DATA_DIR, "weather_station_data")
ANALYSIS_DATA_DIR = os.path.abspath("./analysis_data/")
LOW_TEMP_DATA_DIR = os.path.join(ANALYSIS_DATA_DIR, "low_temps")

for dir in [HISTORICAL_DATA_DIR, WEATHER_DATA_DIR, LOW_TEMP_DATA_DIR, ANALYSIS_DATA_DIR]:
    if not os.path.exists(dir):
        os.makedirs(dir)


WEATHER_STATION_IDS = [
    "USW00023066",  # Grand Junction Walker Field
    "USC00053553",  # Greeley UNC
    "USC00053005",  # Ft Collins
    "USC00050848",  # Boulder
    "USC00055984",  # Northglenn
    "USC00058995",  # Wheat Ridge
    "USW00023061"  # Alamosa
]

In [None]:
# Uncomment to force re-download of source data
# Otherwise can also run the download script manually via: python download_historical_data.py
# Data files are saved locally so you only need to re-download to get new/different data

#dl.download_eia_historical_data(ELECTRIC_DATA_DIR, eia_respondent="PSCO")
#dl.download_ghcnd_historical_data(WEATHER_DATA_DIR, WEATHER_STATION_IDS)

In [5]:
## Read in the weather DataFrame
temp_df = dl.read_weather_data(WEATHER_DATA_DIR + "\*.json", earliest_date=None)

In [6]:
temp_df.head()

Unnamed: 0_level_0,USC00050848_tmax,USC00050848_tmin,USC00053005_tmax,USC00053005_tmin,USC00053553_tmax,USC00053553_tmin,USC00055984_tmax,USC00055984_tmin,USC00058995_tmax,USC00058995_tmin,USW00023061_tmax,USW00023061_tmin,USW00023066_tmax,USW00023066_tmin
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-01,0.0,-14.4,2.2,-19.4,1.7,-12.2,1.1,-21.1,-0.6,-22.8,-1.6,-22.7,-3.2,-16.6
2015-01-02,5.0,-14.4,3.9,-11.1,2.8,-8.3,-0.6,-15.6,-2.8,-17.8,-4.9,-22.7,-3.2,-16.6
2015-01-03,1.7,-9.4,-2.2,-13.3,-0.6,-13.9,3.9,-11.1,5.0,-12.8,0.6,-22.1,-2.7,-14.3
2015-01-04,-3.9,-17.8,-4.4,-15.6,-8.9,-20.0,3.9,-17.2,-0.6,-19.4,0.0,-21.6,-2.1,-14.9
2015-01-05,13.3,-16.7,15.0,-16.7,-3.3,-18.3,1.1,-17.8,-0.6,-18.3,1.7,-17.7,0.6,-12.1


In [None]:
## Quick look at all that data
# fig, (ax1, ax2) = plt.subplots(1, 2)
# fig.set_figwidth(15)
# fig.set_figheight(4)
# ax1.plot(temp_df)
# _ = ax2.hist(temp_df, bins=10)

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4)
fig.set_figwidth(15)
fig.set_figheight(4)
ax1.plot(demand_df)
ax2.hist(demand_df, bins=50)
pd.plotting.autocorrelation_plot(demand_df, ax=ax3).set_xlim([8,400])
pd.plotting.autocorrelation_plot(demand_df, ax=ax4).set_xlim([0,7])

In [None]:
joined_df = pd.merge(demand_df, temp_df, how="outer", left_index=True, right_index=True)
joined_df.dropna(inplace=True)

## Graphs demand vs tmin & max, which is harder with many weather stations of data
# slice = joined_df[["daily_demand", "tmax", "tmin"]]  #["2016-12-01":"2017-03-01"]

# fig, ax1 = plt.subplots(1, 1)
# fig.set_figwidth(15)
# fig.set_figheight(4)
# ax1.set_ylabel("megawatt-hours")
# ax1.plot(slice["daily_demand"], color="tab:green")

# ax2 = ax1.twinx()
# ax2.set_ylabel("deg C")
# ax2.plot(slice["tmax"], color="tab:red")
# ax2.plot(slice["tmin"], color="tab:blue")


In [None]:
joined_df.sort_values(by=["daily_demand"], ascending=False).head()