In [10]:
import numpy as np 
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit #for data preprocessing and crass validating 
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import HistGradientBoostingRegressor

from statistics import mean
from hyperopt import Trials, hp, fmin, tpe, STATUS_OK, space_eval #for hyperparameter tuning and minimizing

from cyclic_boosting.pipelines import pipeline_CBClassifier

from datetime import date
from datetime import datetime
from datetime import timedelta

import tensorflow as tf
import keras
import keras.layers as layers
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD 
from keras.callbacks import EarlyStopping
from keras.losses import MeanSquaredError
from keras.layers import LSTM

import itertools

from sklearn.discriminant_analysis import StandardScaler


import seaborn as sns 

from scipy.stats import boxcox 
from scipy.special import inv_boxcox

from termcolor import colored

import joblib
from tqdm import tqdm 


In [11]:
modelDict = joblib.load('modelDict.pkl')


In [32]:
samples = pd.read_csv('preprocessed_lstm.csv')
building_id = pd.read_csv('devices.csv')
test = pd.read_csv('test.csv', header = None)
reading_types = pd.read_csv('reading_types.csv')

predictions = test

In [33]:
predictions.columns = ['device_id', 'date', 'value_type_id']
samples['date'] = pd.to_datetime(samples['date'])
predictions = pd.merge(predictions, building_id, on='device_id', how='inner')
predictions['date'] =  pd.to_datetime(predictions['date'])
predictions['floored_date'] = predictions['date'].dt.floor('H')

In [23]:
predictions.info() 
pregressBar = tqdm(len = len(predictions))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048576 entries, 0 to 1048575
Data columns (total 5 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   device_id      1048576 non-null  int64         
 1   date           1048576 non-null  datetime64[ns]
 2   value_type_id  1048576 non-null  int64         
 3   building_id    1048576 non-null  int64         
 4   floored_date   1048576 non-null  datetime64[ns]
dtypes: datetime64[ns](2), int64(3)
memory usage: 40.0 MB


In [34]:
def get_predictions(row):
    type = row['value_type_id']
    date = row['floored_date']
    building = row['building_id']
    print(reading_types.at[type - 1, 'reading_type_name'], date, building, '=' * 80)

    model = modelDict[building][type - 1]

    start_date = date  - timedelta(hours = 1)

    inputs = samples[(samples['building_id'] == building)]
    inputs = inputs[(inputs['date'] >= start_date) & (inputs['date'] < date)]

    print(inputs.head(1))
    inputs = inputs.drop(['Unnamed: 0', 'building_id', 'date'], axis = 1)  

    inputs = inputs[[str(type)]]

    scaler = StandardScaler() 
    scaler = scaler.fit(inputs)
    inputs_scaled = scaler.transform(inputs)

    prediction = model.predict(inputs_scaled)

    pred = scaler.inverse_transform(prediction)[0, 0]

    print(pred)

    return pred #prob have to grab the value or smt





In [35]:
predictions = predictions.head(5)
predictions['value'] = predictions.apply(get_predictions, axis=1)

       Unnamed: 0                date  building_id      1    2     3    4  \
56009        3069 2023-05-12 13:00:00            1  480.0  0.2  35.5  0.0   

         5    6         7  ...    10         11        12  day type   Fall  \
56009  0.0  0.0  9.214286  ...  20.2  18.866667  53.20625         1  False   

       Spring  Summer  Winter  trimester_day  working_hour  
56009    True   False   False             72             5  

[1 rows x 22 columns]
480.27908
       Unnamed: 0                date  building_id      1    2     3    4  \
56009        3069 2023-05-12 13:00:00            1  480.0  0.2  35.5  0.0   

         5    6         7  ...    10         11        12  day type   Fall  \
56009  0.0  0.0  9.214286  ...  20.2  18.866667  53.20625         1  False   

       Spring  Summer  Winter  trimester_day  working_hour  
56009    True   False   False             72             5  

[1 rows x 22 columns]
0.37800542
       Unnamed: 0                date  building_id      1    2   

In [17]:
predictions = predictions.drop(['floored_date', 'building_id'], axis = 1)
predictions.to_csv('predictions.csv', header = False)