# Load json dataset and train

In [2]:
import os
import json
import tzlocal
import numpy as np
import pandas as pd
from Fetcher import Dataset
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing


list_town = ["Miami", "New York", "Las Vega", "Chicago", "Seattle", "San Francisco", "Washington","New Orleans", "Palm Springs", "San Diego", "Charleston"]
list_dataset = ["temp", "rainfall", "snowfall", "wind", "solar"]

d = Dataset()
final_df = []
for dataset in list_dataset[:1]:
    for town in list_town[:2]:
        print(f"dataset/{dataset}/{town}.json")
        if os.path.exists(f"dataset/{dataset}/{town}.json"):
            print(f"{dataset} - {town}")
            res = json.load(open(f"dataset/{dataset}/{town}.json"))
            data = res["data"]

            index = pd.to_datetime(list(data.keys()))
            values = [float(s) if s else None for s in data.values()]

            series = pd.Series(values, index=index)
            df = series.to_frame(name='Value')

            df = df[~df.index.astype(str).str.contains('02-29')]

            #algorithm
            print("Training...")
            hw_model = ExponentialSmoothing(df["Value"],
                      trend    ="add",
                      seasonal = "add",
                      seasonal_periods=365,
                      damped=False
                      ).fit(use_boxcox="log") # damped=False


            hw_fitted = hw_model.fittedvalues
            hw_resid = hw_model.resid
            days_in_future = 30
            # Adding the mean of the residuals to correct the bias.
            py_hw = hw_model.forecast(days_in_future) + np.mean(hw_resid)
            
            # to frame
            df = py_hw.to_frame()

            # get lat and long
            lat, long = d.get_lat_lon(town)
            
            df["lat"] = str(lat)
            df["long"] = str(long)
            df["town"] = str(town)
            
            final_df.append(df)
            df = None

to_save = pd.concat(final_df)
to_save.to_pickle("temp_model.pkl")

dataset/temp/Miami.json
temp - Miami
Training...




15005    79.862227
15006    81.648479
15007    81.103229
15008    79.948219
15009    79.579049
15010    80.078869
15011    81.774671
15012    81.006023
15013    80.947216
15014    80.993954
15015    82.390774
15016    83.085417
15017    83.255930
15018    83.742356
15019    83.565536
15020    82.915414
15021    82.611477
15022    82.703687
15023    82.093222
15024    82.271099
15025    82.117348
15026    82.253567
15027    82.079928
15028    82.149169
15029    82.077249
15030    82.791809
15031    83.002856
15032    80.778357
15033    81.162077
15034    81.994254
dtype: float64
dataset/temp/New York.json
temp - New York
Training...




15005    79.862227
15006    81.648479
15007    81.103229
15008    79.948219
15009    79.579049
15010    80.078869
15011    81.774671
15012    81.006023
15013    80.947216
15014    80.993954
15015    82.390774
15016    83.085417
15017    83.255930
15018    83.742356
15019    83.565536
15020    82.915414
15021    82.611477
15022    82.703687
15023    82.093222
15024    82.271099
15025    82.117348
15026    82.253567
15027    82.079928
15028    82.149169
15029    82.077249
15030    82.791809
15031    83.002856
15032    80.778357
15033    81.162077
15034    81.994254
dtype: float64


In [91]:
df_open = pd.read_pickle("temp_model.pkl")
from datetime import date

In [104]:
def get_prediction(df, date_in_future):
    future = date_in_future                             # date for future prediction
    last = get_last_day_from_dataset()                  # last day from temperature dataset

    f = int(future[:4]), int(future[5:7]), int(future[8:])
    l = int(last[:4]), int(last[5:7]), int(last[8:])

    days_in_future = date(f[0], f[1], f[2]) - date(l[0], l[1], l[2])
    days_in_future = days_in_future.days
    print(days_in_future)
    assert days_in_future >= 1 and days_in_future <= 30, "Date value should be within 1 and 30"

    df = df_open[df_open["town"] == "Miami"] # get only DataFrame for Miami
    df.reset_index(inplace=True)             # separate index merget with usable value
    df.drop(columns="index")                 # get drop index column which is containing unusable values
    days_in_future -= 1                      # because index in DataFrame starts from 0 instead of 1 (regular starting index of days)
    return df.get_value(days_in_future, 1, takeable=True)

In [105]:
def get_last_day_from_dataset():
    df = json.load(open("dataset/temp/Miami.json"))
    df = df["data"]
    return list(df.keys())[-1]

In [106]:
get_prediction(df_open, "2022-02-18")

9




80.94721606283579

In [95]:
get_last_day_from_dataset()

'2022-02-09'

In [103]:
df_open

Unnamed: 0,0,lat,long,town
15005,79.862227,25.76232961361461,-80.19114735100034,Miami
15006,81.648479,25.76232961361461,-80.19114735100034,Miami
15007,81.103229,25.76232961361461,-80.19114735100034,Miami
15008,79.948219,25.76232961361461,-80.19114735100034,Miami
15009,79.579049,25.76232961361461,-80.19114735100034,Miami
15010,80.078869,25.76232961361461,-80.19114735100034,Miami
15011,81.774671,25.76232961361461,-80.19114735100034,Miami
15012,81.006023,25.76232961361461,-80.19114735100034,Miami
15013,80.947216,25.76232961361461,-80.19114735100034,Miami
15014,80.993954,25.76232961361461,-80.19114735100034,Miami
