In [0]:
!pip install git+https://github.com/MrEthic/FAIC-Python-Toolbox.git

Collecting git+https://github.com/MrEthic/FAIC-Python-Toolbox.git
  Cloning https://github.com/MrEthic/FAIC-Python-Toolbox.git to /tmp/pip-req-build-nlrvr1wo
  Running command git clone -q https://github.com/MrEthic/FAIC-Python-Toolbox.git /tmp/pip-req-build-nlrvr1wo
  Resolved https://github.com/MrEthic/FAIC-Python-Toolbox.git to commit b2095cae0fdb96d432db4473fed458897abb6ea9
Building wheels for collected packages: faic-toolbox
  Building wheel for faic-toolbox (setup.py) ... [?25l- \ done
[?25h  Created wheel for faic-toolbox: filename=faic_toolbox-dev-py3-none-any.whl size=4929 sha256=dd4ad3c1cde83e9315a3f436580cd0fd5b063bea5824b69aaf8d39450b49df0f
  Stored in directory: /tmp/pip-ephem-wheel-cache-yf18ttdu/wheels/69/f9/e0/2ae7397209a182fa3f1c03d7e8c2d83e9c65f8d009ae7e26e4
Failed to build faic-toolbox
Installing collected packages: faic-toolbox
    Running setup.py install for faic-toolbox ... [?25l- done
[33m  DEPRECATION: faic-toolbox was installed using the l

In [0]:
import pandas as pd
import plotly.express as px
import numpy as np
from mlflow.tracking.client import MlflowClient
import mlflow.statsmodels
import mlflow.sklearn
import mlflow.xgboost
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
import mlflow.pyfunc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Normalization, Reshape, Lambda, Dropout
import tensorflow.keras.metrics
from xgboost import XGBRegressor

mlflow.statsmodels.autolog(disable=True)
mlflow.sklearn.autolog(disable=True)

## Get Data from October
We wil use the first week for training and the last 3 weeks to test the models

In [0]:
from faic_toolbox import datalake
DATALAKE_API_KEY = dbutils.secrets.get(scope="brewai-iaq-forecast", key="DATALAKE-API-KEY")
datalake.config.datalake = datalake.DatalakeConfig(
    "https://akeqm174e7.execute-api.ap-southeast-2.amazonaws.com/v1",
    DATALAKE_API_KEY
)
model_hist = datalake.pandas.dataframe_from_datalake(layer='silver', source_type='brewai', source_name='models', ts='selected', table='history.csv')
model_hist.time = pd.to_datetime(model_hist.time)
model_hist = model_hist.sort_values('time', ascending=False)
model_hist

Unnamed: 0,time,model_name,artifact_id,reward
0,2022-12-14 17:07:11.100000+11:00,moving_average,4418062098066875,1.0
1,2022-01-01 00:00:01.100000+11:00,simple_lstm,4254964064683953,-1.774894
2,2022-01-01 00:00:01.100000+11:00,moving_average,4418062098066875,1.0
3,2022-01-01 00:00:01.100000+11:00,xgboost,4226517600327584,1.0
4,2022-01-01 00:00:01.100000+11:00,dense_multilayer,988661569730021,1.0


In [0]:
DATALAKE_API_KEY = dbutils.secrets.get(scope="brewai-iaq-forecast", key="DATALAKE-API-KEY")
datalake.config.datalake = datalake.DatalakeConfig(
    "https://akeqm174e7.execute-api.ap-southeast-2.amazonaws.com/v1",
    DATALAKE_API_KEY
)
table_name, ts = ('2022oct.csv', 'exports')
df_api = datalake.pandas.dataframe_from_datalake(layer='bronze', source_type='brewai', source_name='sensors', ts=ts, table=table_name)
df_api.ts = pd.to_datetime(df_api.ts)
df_api.set_index('ts', inplace=True)

In [0]:
df_api = df_api[df_api['devid']=='B84C4503F361D64A']
df_training = df_api['2022-10-1':'2022-10-8']

## Models
- Simple LSTM
- Multilayer Dense
- XGBoost
- Moving Average Regression

In [0]:
mean = df_training['iaq'].mean()
std = df_training['iaq'].std()

def get_loss_weights(n):
    l = np.linspace(0.1,2.4,n)
    return -np.exp(l**0.9)+10

def build_lstm_model(n_cells, lookback, lookahead, mean, std, optimizer, loss):
    lstm_model = Sequential()
    lstm_model.add(Normalization(mean=mean, variance=std**2))
    lstm_model.add(Reshape((lookback, 1), input_shape=(lookback,)))
    lstm_model.add(LSTM(n_cells, return_sequences=False))
    lstm_model.add(Dense(lookahead))
    lstm_model.add(Lambda(lambda x:x*std+mean))
    #lstm_model.add(Normalization(mean=mean, variance=variance, invert=True))
    lstm_model.compile(optimizer=optimizer, loss=loss, loss_weights=get_loss_weights(lookahead))
    return lstm_model

def build_dense_model(n_cells, activation, optimizer, loss):
    model = Sequential()
    model.add(Dense(n_cells, activation=activation))
    model.add(Dropout(0.1))
    model.add(Dense(15))
    model.compile(optimizer=optimizer, loss=loss)
    return model

class CustomRegressor(BaseEstimator, RegressorMixin):
    def fit(self):
        
        return self
    
    def predict(self, data):
        
        y = []
        data = data[0]
        # Generate 10MA
        for i in range(10, len(data)):
            ma_val = sum(data[i-10:i])/10
            y.append(ma_val)
            
        X = range(len(data)-10)
        X = np.reshape(X, (len(data)-10, 1))
        pred_val = []
        timestamp_pred = []
        
        # For n predicted value, linear regression is applied on last n MA values
        for i in range(15):
            reg = LinearRegression().fit(X[-i+1:], y[-i+1:])
            pred_val.append(reg.predict([[len(X)+i+1]])[0])
        
        return np.asarray([pred_val])

In [0]:
# Build all the models
simple_lstm_model = build_lstm_model(100, 60, 15, mean, std, 'adam', 'mae')
dense_multilayer_model = build_dense_model(100, 'relu', 'adam', 'mae')
moving_average_model = CustomRegressor()
xgboost_model = XGBRegressor()

## Training
- step 1: Prepare data (windows)
- step 2: Fit all the models on first week of October

In [0]:
def window_data(lookahead, lookback, df):
    
    X = np.empty((len(df)-lookahead-lookback, lookback))
    Y = np.empty((len(df)-lookahead-lookback, lookahead))
        
    for i in range(len(df)-lookahead-lookback):
        tt = df[i:i+lookback]
        X[i]=tt
        Y[i]=df[lookback+i:lookback+i+lookahead]

    return X, Y

In [0]:
X_train, Y_train = window_data(15, 60, df_training['iaq'])

In [0]:
mlflow.tensorflow.autolog(disable=True)
simple_lstm_model.fit(X_train, Y_train, batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Out[10]: <keras.callbacks.History at 0x7f0380a6a040>

In [0]:
dense_multilayer_model.fit(X_train, Y_train, batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Out[11]: <keras.callbacks.History at 0x7f032d1ea8e0>

In [0]:
mlflow.xgboost.autolog(disable=True)
xgboost_model.fit(X_train, Y_train)

Out[12]: XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

## Testing

In [0]:
df_testing = df_api['2022-10-9':]

In [0]:
X_test, Y_test = window_data(15, 60, df_testing['iaq'])

- Simple LTSM

In [0]:
pred = simple_lstm_model.predict(X_test)



In [0]:
df_results_lstm = pd.DataFrame(columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15'])

In [0]:
for count, i in enumerate(df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].iaq):
    df_results_lstm = df_results_lstm.append(pd.DataFrame([[i, pred[count][4], pred[count][9], pred[count][14]]],columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15']))


In [0]:
df_results_lstm.set_index(df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].index, inplace=True)

In [0]:
df_results_lstm['iaq~5'] = df_results_lstm['iaq~5'].shift(5)
df_results_lstm['iaq~10'] = df_results_lstm['iaq~10'].shift(10)
df_results_lstm['iaq~15'] = df_results_lstm['iaq~15'].shift(15)

In [0]:
df_results_lstm.dropna(inplace=True)

In [0]:
def mae(df, n):
    mse = np.abs(df.iaq - df[f"iaq~{n}"]) / df.iaq
    return mse

In [0]:
df_results_lstm.reset_index(inplace=True)

In [0]:
df_gr = df_results_lstm.groupby(pd.Grouper(key='ts', axis=0, freq='H')).agg('mean')
df_gr['iaq']=df_results_lstm.groupby(pd.Grouper(key='ts', axis=0, freq='H'))['iaq'].agg('mean').values
df_gr['mae'] = df_results_lstm.apply(lambda row:(mae(row, 5) + mae(row, 10) + mae(row, 15))/3, axis=1)

In [0]:
df_gr

Unnamed: 0_level_0,iaq~5,iaq~10,iaq~15,iaq,mae
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-09 01:00:00+00:00,94.167358,96.612106,98.624893,90.782609,
2022-10-09 02:00:00+00:00,62.869278,65.112000,66.584976,60.533333,
2022-10-09 03:00:00+00:00,64.993782,64.784973,64.332764,67.033333,
2022-10-09 04:00:00+00:00,67.474113,66.452927,66.086174,69.283333,
2022-10-09 05:00:00+00:00,73.156975,73.548073,73.271957,72.700000,
...,...,...,...,...,...
2022-10-31 19:00:00+00:00,178.224091,179.886414,180.783661,175.966667,
2022-10-31 20:00:00+00:00,156.596375,157.608704,158.819290,156.866667,
2022-10-31 21:00:00+00:00,138.894775,144.867676,148.840271,130.101695,
2022-10-31 22:00:00+00:00,61.410217,64.863701,68.504723,58.508475,


- Multilayer Dense

In [0]:
pred_dense = dense_multilayer_model.predict(X_test)



In [0]:
df_results_dense = pd.DataFrame(columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15'])
for count, i in enumerate(df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].iaq):
    df_results_dense = df_results_dense.append(pd.DataFrame([[i, pred_dense[count][4], pred_dense[count][9], pred_dense[count][14]]],columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15']))


In [0]:
df_results_dense['ts']=df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].index
df_results_dense.dropna(inplace=True)
df_results_dense['iaq~5'] = df_results_dense['iaq~5'].shift(5)
df_results_dense['iaq~10'] = df_results_dense['iaq~10'].shift(10)
df_results_dense['iaq~15'] = df_results_dense['iaq~15'].shift(15)
df_results_dense.dropna(inplace=True)

In [0]:
df_dense_gr = df_results_dense.groupby(pd.Grouper(key='ts', axis=0, freq='H')).agg('mean')
df_dense_gr['iaq']=df_results_dense.groupby(pd.Grouper(key='ts', axis=0, freq='H'))['iaq'].agg('mean').values
df_dense_gr['mae'] = df_dense_gr.apply(lambda row:(mae(row, 5) + mae(row, 10) + mae(row, 15))/3, axis=1)
df_dense_gr

Unnamed: 0_level_0,iaq~5,iaq~10,iaq~15,iaq,mae
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-09 01:00:00+00:00,93.330315,96.234001,100.282738,90.782609,0.064253
2022-10-09 02:00:00+00:00,60.264584,62.762596,67.749161,60.533333,0.053490
2022-10-09 03:00:00+00:00,64.525688,63.053623,61.757729,67.033333,0.058493
2022-10-09 04:00:00+00:00,66.487297,66.798080,67.273140,69.283333,0.035081
2022-10-09 05:00:00+00:00,73.112976,73.824165,73.725594,72.700000,0.011750
...,...,...,...,...,...
2022-10-31 19:00:00+00:00,175.225082,176.400345,179.500687,175.966667,0.008921
2022-10-31 20:00:00+00:00,152.986511,154.772003,159.100891,156.866667,0.017444
2022-10-31 21:00:00+00:00,142.290848,147.921494,151.896698,130.101695,0.132727
2022-10-31 22:00:00+00:00,56.958641,60.997433,70.304085,58.508475,0.090211


- XGBoost

In [0]:
pred_xgb = xgboost_model.predict(X_test)

In [0]:
df_results_xgb = pd.DataFrame(columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15'])
for count, i in enumerate(df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].iaq):
    df_results_xgb = df_results_xgb.append(pd.DataFrame([[i, pred_xgb[count][4], pred_xgb[count][9], pred_xgb[count][14]]],columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15']))

In [0]:
df_results_xgb['ts']=df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].index
df_results_xgb.dropna(inplace=True)
df_results_xgb['iaq~5'] = df_results_xgb['iaq~5'].shift(5)
df_results_xgb['iaq~10'] = df_results_xgb['iaq~10'].shift(10)
df_results_xgb['iaq~15'] = df_results_xgb['iaq~15'].shift(15)
df_results_xgb.dropna(inplace=True)

In [0]:
df_xgb_gr = df_results_xgb.groupby(pd.Grouper(key='ts', axis=0, freq='H')).agg('mean')
df_xgb_gr['iaq']=df_results_xgb.groupby(pd.Grouper(key='ts', axis=0, freq='H'))['iaq'].agg('mean').values
df_xgb_gr['mae'] = df_xgb_gr.apply(lambda row:(mae(row, 5) + mae(row, 10) + mae(row, 15))/3, axis=1)
df_xgb_gr

Unnamed: 0_level_0,iaq~5,iaq~10,iaq~15,iaq,mae
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-09 01:00:00+00:00,94.512428,97.893394,101.685852,90.782609,0.079839
2022-10-09 02:00:00+00:00,66.123634,68.829308,73.614983,60.533333,0.148502
2022-10-09 03:00:00+00:00,65.454956,65.802284,64.105606,67.033333,0.028529
2022-10-09 04:00:00+00:00,68.340652,67.747337,66.483742,69.283333,0.025395
2022-10-09 05:00:00+00:00,72.799431,73.417107,72.614624,72.700000,0.004135
...,...,...,...,...,...
2022-10-31 19:00:00+00:00,178.701569,180.463394,182.049850,175.966667,0.025222
2022-10-31 20:00:00+00:00,159.856750,160.324509,163.559479,156.866667,0.027923
2022-10-31 21:00:00+00:00,136.759125,144.486664,148.020508,130.101695,0.099822
2022-10-31 22:00:00+00:00,64.943436,69.244324,74.104820,58.508475,0.186680


- Moving Average Regression

In [0]:
pred_ma = []
for i in X_test:
    pred_ma.append(moving_average_model.predict([i])[0])

In [0]:
pred_ma

Out[34]: [array([110.563     , 110.29179352, 111.8       , 111.8       ,
        110.21666667, 109.89      , 110.26      , 110.7047619 ,
        110.86785714, 110.57857143, 110.47777778, 110.21393939,
        109.78909091, 109.20827506, 108.73626374]),
 array([110.41336735, 110.1444946 , 111.7       , 111.2       ,
        111.41666667, 110.37      , 109.92      , 110.07904762,
        110.39642857, 110.52738095, 110.27555556, 110.18      ,
        109.93636364, 109.53986014, 108.99120879]),
 array([110.28636735, 110.00621369, 111.6       , 111.1       ,
        111.        , 111.13      , 110.34      , 109.88      ,
        109.925     , 110.15119048, 110.24888889, 110.01757576,
        109.92090909, 109.68974359, 109.31428571]),
 array([110.13408163, 109.86521489, 111.2       , 109.2       ,
        109.75      , 109.96      , 110.22      , 109.69619048,
        109.33571429, 109.375     , 109.58222222, 109.68787879,
        109.49909091, 109.42389277, 109.22197802]),
 array([109.966

In [0]:
df_results_ma = pd.DataFrame(columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15'])
for count, i in enumerate(df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].iaq):
    df_results_ma = df_results_ma.append(pd.DataFrame([[i, pred_ma[count][4], pred_ma[count][9], pred_ma[count][14]]],columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15']))

In [0]:
df_results_ma['ts']=df_testing['2022-10-09 00:59:00':'2022-10-31 23:44:00'].index
df_results_ma.dropna(inplace=True)
df_results_ma['iaq~5'] = df_results_ma['iaq~5'].shift(5)
df_results_ma['iaq~10'] = df_results_ma['iaq~10'].shift(10)
df_results_ma['iaq~15'] = df_results_ma['iaq~15'].shift(15)
df_results_ma.dropna(inplace=True)

In [0]:
df_ma_gr = df_results_ma.groupby(pd.Grouper(key='ts', axis=0, freq='H')).agg('mean')
df_ma_gr['iaq']=df_results_ma.groupby(pd.Grouper(key='ts', axis=0, freq='H'))['iaq'].agg('mean').values
df_ma_gr['mae'] = df_ma_gr.apply(lambda row:(mae(row, 5) + mae(row, 10) + mae(row, 15))/3, axis=1)
df_ma_gr

Unnamed: 0_level_0,iaq~5,iaq~10,iaq~15,iaq,mae
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-09 01:00:00+00:00,93.832246,94.311491,95.282728,90.782609,0.040678
2022-10-09 02:00:00+00:00,62.221111,62.572262,60.981722,60.533333,0.022991
2022-10-09 03:00:00+00:00,65.647500,65.646647,66.951099,67.033333,0.014196
2022-10-09 04:00:00+00:00,68.379722,66.371925,64.531355,69.283333,0.041217
2022-10-09 05:00:00+00:00,73.489722,74.143552,73.689505,72.700000,0.014777
...,...,...,...,...,...
2022-10-31 19:00:00+00:00,177.467500,177.467917,177.556923,175.966667,0.008699
2022-10-31 20:00:00+00:00,156.120000,154.615397,153.400549,156.866667,0.013736
2022-10-31 21:00:00+00:00,137.727401,141.115880,145.621084,130.101695,0.087519
2022-10-31 22:00:00+00:00,60.237853,58.113095,54.782082,58.508475,0.033335


## Results

In [0]:
results_df = pd.DataFrame()
results_df['xgb'] = df_xgb_gr['mae']
results_df['dense'] = df_dense_gr['mae']
results_df['lstm'] = df_gr['mae']
results_df['moving_avg'] = df_ma_gr['mae']
results_df.dropna(inplace=True)

In [0]:
# Result dataset was saved in databricks to save time
results_df=spark.sql("SELECT * FROM hive_metastore.default.model_comparison")
results_df = results_df.toPandas()

- MAE evolution during the month

In [0]:
fig = px.line(results_df, width=1000, height=600)
fig.update_layout(
    title="MAE value in October",
    xaxis_title="Date",
    yaxis_title="MAE value",
    legend_title="Models"
)
fig.show()

- MAE bar plot

In [0]:
fig = px.bar(results_df.mean(), width=800, height=600)
fig.update_layout(
    title="MAE mean value in October",
    xaxis_title="Models",
    yaxis_title="MAE",
    showlegend=False
)
fig.show()

- MAE box plot

In [0]:
import plotly.graph_objects as go
layout = go.Layout(
    autosize=False,
    width=1200,
    height=800,
    xaxis_title="MAE",
    xaxis= go.layout.XAxis(type="log")
)
fig = go.Figure(layout=layout)
# Use x instead of y argument for horizontal plot
fig.add_trace(go.Box(x=results_df['xgb'], name="XGBoost"))
fig.add_trace(go.Box(x=results_df['dense'], name="Dense Multilayer"))
fig.add_trace(go.Box(x=results_df['lstm'], name="Simple LSTM"))
fig.add_trace(go.Box(x=results_df['moving_avg'], name="Moving Average Reg"))
fig.show()

## Model Selection
Work in progress

In [0]:
model_hist = pd.DataFrame([['xgb', 0.1],['dense', 1],['lstm', 1],['moving_avg', 1]], columns=['model', 'reward'])

In [0]:
model_count = model_hist.groupby('model')['reward'].nunique()
rewards = {
    model_name: 0
    for model_name in model_hist.model.unique()
}
last_model = model_hist.iloc[0].model
for model in rewards.keys():
    rewards[model] = model_hist[model_hist.model == model].iloc[0].reward

In [0]:
for index, row in results_df.iterrows():
    model_count = model_hist.groupby('model')['reward'].nunique()
    last_reward = 1 - row[last_model]
    rewards[last_model] += last_reward/10
    model_hist.iloc[0, -1] = rewards[last_model]
    t = model_hist.shape[0]
    decision = []
    for model in rewards.keys():
        avg_reward = rewards[model] / model_count[model]
        d = avg_reward + np.sqrt(2 * np.log(t) / model_count[model])
        decision.append((model, d))
    next_model = max(decision, key=lambda x: x[1])[0]
    current = pd.DataFrame([[next_model, rewards[next_model]]], columns=model_hist.columns)
    model_hist = pd.concat([current, model_hist])
    model_hist.reset_index(drop=True, inplace=True)



In [0]:
model_hist.drop(model_hist.tail(4).index, inplace = True)

In [0]:
model_hist.index = results_df[::-1].index

In [0]:
model_hist['model'] = model_hist['model'].replace(['moving_avg'], 'ma')
model_hist

Unnamed: 0_level_0,model,reward
ts,Unnamed: 1_level_1,Unnamed: 2_level_1


In [0]:
df_t = df_results_lstm[df_results_lstm.ts.dt.floor('H').isin(model_hist[model_hist.model=='lstm'].index)]
df_t['model']='lstm'



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [0]:
final_pred = pd.DataFrame(columns=['iaq', 'iaq~5', 'iaq~10', 'iaq~15', 'model'])

df_t = df_results_lstm[df_results_lstm.ts.dt.floor('H').isin(model_hist[model_hist.model=='lstm'].index)]
df_t['model']='lstm'
final_pred = pd.concat([final_pred, df_t])

df_t = df_results_dense[df_results_dense.ts.dt.floor('H').isin(model_hist[model_hist.model=='dense'].index)]
df_t['model']='dense'
final_pred = pd.concat([final_pred, df_t])

df_t = df_results_xgb[df_results_xgb.ts.dt.floor('H').isin(model_hist[model_hist.model=='xgb'].index)]
df_t['model']='xgb'
final_pred = pd.concat([final_pred, df_t])

df_t = df_results_ma[df_results_ma.ts.dt.floor('H').isin(model_hist[model_hist.model=='ma'].index)]
df_t['model']='ma'
final_pred = pd.concat([final_pred, df_t])

final_pred.set_index('ts', inplace=True)
final_pred.sort_index()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Unnamed: 0_level_0,iaq,iaq~5,iaq~10,iaq~15,model
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-09 01:14:07+00:00,100,107.059868,105.230385,106.639664,dense
2022-10-09 01:15:07+00:00,99,105.730934,105.362946,108.346519,dense
2022-10-09 01:16:07+00:00,101,106.419464,105.562256,107.395447,dense
2022-10-09 01:17:07+00:00,97,104.696793,103.970062,105.368561,dense
2022-10-09 01:18:07+00:00,98,104.703606,103.731796,107.201004,dense
...,...,...,...,...,...
2022-10-31 23:39:21+00:00,52,53.680489,55.197239,54.574409,xgb
2022-10-31 23:40:21+00:00,51,52.628426,55.425987,54.253563,xgb
2022-10-31 23:41:21+00:00,52,53.052547,54.316677,54.907326,xgb
2022-10-31 23:42:21+00:00,51,52.139561,54.512917,57.859520,xgb


In [0]:
fig = px.scatter(final_pred, y='iaq~5', color='model', width=1000, height=600)
fig.add_scatter(final_pred.iaq, mode='line')
fig.update_layout(
    title="MAE value in October",
    xaxis_title="Date",
    yaxis_title="MAE value",
    legend_title="Models"
)
fig.update_traces(marker=dict(size=4))
fig.show()

In [0]:
cp = model_hist.copy()
cp.reset_index(inplace=True)
cp['end'] = cp.ts.shift(1)

Unnamed: 0,ts,model,reward,end
0,2022-10-31 23:00:00+00:00,xgb,53.396180,NaT
1,2022-10-31 22:00:00+00:00,xgb,53.396180,2022-10-31 23:00:00+00:00
2,2022-10-31 21:00:00+00:00,xgb,53.301976,2022-10-31 22:00:00+00:00
3,2022-10-31 20:00:00+00:00,ma,53.216449,2022-10-31 21:00:00+00:00
4,2022-10-31 19:00:00+00:00,ma,53.117223,2022-10-31 20:00:00+00:00
...,...,...,...,...
534,2022-10-09 05:00:00+00:00,ma,0.682835,2022-10-09 06:00:00+00:00
535,2022-10-09 04:00:00+00:00,lstm,0.583218,2022-10-09 05:00:00+00:00
536,2022-10-09 03:00:00+00:00,lstm,0.484186,2022-10-09 04:00:00+00:00
537,2022-10-09 02:00:00+00:00,dense,0.384661,2022-10-09 03:00:00+00:00


In [0]:
fig = px.timeline(cp, x_start="ts", x_end="end", y="model", color="model")
fig.show()