In [1]:
import datetime
import math
import os
import site
import sqlite3
import sys

import logzero
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import yaml
from logzero import logger
from tqdm import tqdm
from tqdm.notebook import tqdm
from yaml import dump, load, safe_load

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
import tensorflow
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import LSTM, RNN, Conv1D, Dense, Lambda, LSTMCell, Reshape
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.optimizers import Adam

In [4]:
print(tf.__version__)

2.4.1


In [5]:
plt.rcParams["figure.figsize"] = 30, 25

plt.rcParams["ytick.labelsize"] = 20
plt.rcParams["xtick.labelsize"] = 20

plt.rcParams["axes.grid"] = False
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.labelpad"] = 16
plt.rcParams["axes.xmargin"] = 0.05
plt.rcParams["axes.ymargin"] = 0.05

In [6]:
tf.random.set_seed(42)
np.random.seed(42)

In [7]:
sys.path.append("../../sql")
import queries

sys.path.append("../source")
import ts_tools

In [8]:
log_path = "logs/"
log_file = "ts_lstm.log"

logzero.logfile(log_path + log_file, maxBytes=1e6, backupCount=5, disableStderrLogger=True)
logger.info(f"{log_path}, {log_file}\n")

In [10]:
configs = None
try:
    with open("../configs/config.yml", "r") as config_in:
        configs = load(config_in, Loader=yaml.SafeLoader)
        logger.info(f"{configs}\n")
except:
    logger.error(f"config file open failure.")
    exit(1)

cfg_vars = configs["url_variables"]
logger.info(f"variables: {cfg_vars}\n")

years = configs["request_years"]
logger.info(f"years: {years}\n")

db_path = configs["file_paths"]["db_path"]

city = configs["location_info"]["city"]
state = configs["location_info"]["state"]
db_file = city + "-" + state + ".db"

db_table1 = configs["table_names"]["db_table1"]
db_table2 = configs["table_names"]["db_table2"]

data_units = configs["lstm_cfg"]["data_units"]
period = configs["lstm_cfg"]["period"]
periods_train = configs["lstm_cfg"]["periods_train"]
periods_val = configs["lstm_cfg"]["periods_val"]
periods_test = configs["lstm_cfg"]["periods_test"]

logger.info(f"{db_path}, {db_file}")

nrows = configs["num_rows"][0]
logger.info(f"number of rows: {nrows}\n")

In [11]:
conn = sqlite3.connect(db_path + db_file)
cursor = conn.cursor()

In [12]:
cursor.execute(queries.select_distinct_zips)
distinct_zipcodes = cursor.fetchall()
distinct_zipcodes = [z[0] for z in distinct_zipcodes]
logger.info(f"distinct zip codes:\n{distinct_zipcodes}")
print(distinct_zipcodes)

['91708']


In [13]:
zipcode_index = 0
params = {"zipcode": distinct_zipcodes[zipcode_index]}

select_nsr_rows = f"""
SELECT date_time,
-- year, month, day, 
-- zipcode,
-- Clearsky_DHI, DHI,
Clearsky_DNI, DNI,
Clearsky_GHI, GHI,
Temperature,
Relative_Humidity,
Precipitable_Water,
-- Wind_Direction,
Wind_Speed
from nsrdb
where zipcode = :zipcode
-- and not (month = 2 and day = 29)
-- and year = 2000
;
"""

df = pd.read_sql(
    select_nsr_rows,
    conn,
    params=params,
    index_col="date_time",
    parse_dates=["date_time"],
)

df.sort_index(axis=0, inplace=True)
# df.head(5)

In [14]:
df_rsd = df.resample("D").mean().reset_index(drop=False)
df_rsd.set_index("date_time", inplace=True)
df_rsd.head()

Unnamed: 0_level_0,Clearsky_DNI,DNI,Clearsky_GHI,GHI,Temperature,Relative_Humidity,Precipitable_Water,Wind_Speed
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1998-01-01,293.458333,293.458333,142.541667,142.541667,15.416667,66.250417,1.320042,1.375
1998-01-02,289.833333,55.583333,143.458333,61.666667,12.125,93.799167,1.329208,2.245833
1998-01-03,300.625,18.25,144.291667,42.0,11.208333,93.456667,1.365375,1.779167
1998-01-04,291.833333,65.916667,144.0,47.875,8.25,94.297917,1.054083,2.008333
1998-01-05,344.25,311.333333,154.666667,147.916667,6.041667,63.997917,0.4035,1.620833


In [15]:
df_rsm = df.resample("M").mean().reset_index(drop=False)
df_rsm.set_index("date_time", inplace=True)
df_rsm.head()

Unnamed: 0_level_0,Clearsky_DNI,DNI,Clearsky_GHI,GHI,Temperature,Relative_Humidity,Precipitable_Water,Wind_Speed
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1998-01-31,312.725806,193.25672,158.55914,116.268817,11.461022,74.901344,1.196043,1.408737
1998-02-28,330.165179,171.066964,201.436012,136.39881,10.00744,86.479226,1.425155,2.150149
1998-03-31,365.642473,219.232527,261.442204,187.193548,12.858871,76.266237,1.270946,1.910887
1998-04-30,384.641667,292.604167,313.873611,268.347222,13.668056,72.995972,1.137753,1.906944
1998-05-31,399.022849,244.490591,345.922043,257.693548,14.939516,79.635323,1.470833,2.055242


In [19]:
columns = df.columns.tolist()
print(columns, "\n")
forecast_on_idx = 3

if data_units == "D":
    df_rs = df_rsd.copy()
else:
    df_rs = df_rsm.copy()

train_len = period * periods_train
val_len = period * periods_val
test_len = period * periods_test

print(
    f"period type: {data_units}, period: {period},\nperiods_train: {periods_train}, periods_val: {periods_val}, periods_test: {periods_test}"
)

print(f"train_len: {train_len}, validate_len: {val_len}, test_len: {test_len}")

['Clearsky_DNI', 'DNI', 'Clearsky_GHI', 'GHI', 'Temperature', 'Relative_Humidity', 'Precipitable_Water', 'Wind_Speed'] 

period type: M, period: 12,
periods_train: 16, periods_val: 5, periods_test: 2
train_len: 192, validate_len: 60, test_len: 24


In [22]:
df_lstm = df_rs[columns[forecast_on_idx]]
X = df_lstm.values

# df_lstm.head(7)
print(X)

[116.2688172  136.39880952 187.19354839 268.34722222 257.69354839
 285.70277778 324.48521505 305.71102151 210.8875     194.34139785
 146.23472222 128.39516129 128.78897849 165.81696429 192.63844086
 218.05972222 277.94623656 322.0375     334.63037634 317.6733871
 245.62222222 219.70967742 148.50416667 132.6827957  118.33333333
 126.35344828 212.1061828  266.02638889 300.50672043 342.91527778
 340.89247312 296.14516129 247.58888889 157.14650538 157.23472222
 122.6061828  125.95967742 135.6875     201.71774194 247.64027778
 286.37231183 326.66666667 329.25806452 309.94623656 261.76805556
 188.22983871 135.89583333 118.2016129  140.03763441 191.01190476
 229.94354839 251.7        296.82795699 339.68194444 325.52956989
 300.07123656 244.1125     169.11424731 147.2375     124.29435484
 141.29166667 150.79017857 228.83064516 257.10138889 288.70295699
 269.44861111 315.04704301 306.71908602 264.42083333 190.8077957
 138.76527778 111.2594086  140.00268817 157.90229885 232.80241935
 269.8111111