# - Test Time

In [1]:
import pandas as pd
import numpy as np
import pickle
import warnings
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
warnings.filterwarnings("ignore")

In [2]:
nb_variables = {'window': 4, 'shift': -1}

In [3]:
# ---- Testing ---- #
x_test = pd.read_csv('../../dengue_features_test.csv', index_col = 0)
x_test = x_test.loc['sj']
x_test = x_test.set_index('week_start_date')
x_test.index = pd.to_datetime(x_test.index)
x_test.sort_index(inplace = True)

In [4]:
perc = np.round(100 * x_test.isna().sum().sum() / (len(x_test) * len(x_test.columns)), 2)
print(f'- Percentage of missing values across the entire dataset {perc}% \n')

for col in x_test.columns[:-1]: # exclude the label variable
    x_test[col] = x_test[col].fillna(x_test.rolling(window = nb_variables['window'],
                                                    min_periods = 1).mean()[col])
    
new_perc = np.round(100 * x_test.isna().sum().sum() / (len(x_test) * len(x_test.columns)), 2)
print(f'- New percentage of missing values across the entire dataset {new_perc}%')

- Percentage of missing values across the entire dataset 1.54% 

- New percentage of missing values across the entire dataset 0.03%


In [5]:
x_test = x_test.fillna(0)
new_perc = np.round(100 * x_test.isna().sum().sum() / (len(x_test) * len(x_test.columns)), 2)
print(f'- Percentage of missing values across the entire dataset {new_perc}%')

- Percentage of missing values across the entire dataset 0.0%


# - Shift the test data like the training data

In [6]:
df_tp1 = x_test.shift(nb_variables['shift']).iloc[:nb_variables['shift'], :]

In [7]:
for i in range(len(df_tp1.columns)):
    df_tp1.rename(columns = {(df_tp1.columns[i]) : (df_tp1.columns[i] + '_tp1')}, inplace = True)

In [8]:
new_df = pd.concat([x_test.iloc[:-1, :], df_tp1], axis = 1)

In [9]:
new_df.shape

(259, 44)

In [11]:
from sklearn.preprocessing import StandardScaler
import pickle
scaler = pickle.load(open('../models/sj_scaler', "rb"))
new_df = scaler.transform(new_df)

# - Load the Model

In [12]:
model = load_model('../models/sj_mlp.keras')
predictions = model.predict(new_df)
predictions = np.round(predictions).astype('int32')
predictions[predictions < 0] = 0
predictions.shape



2023-01-27 18:01:00.414078: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


(259, 1)

- Add a row according to the mean of the last 4 predictions to match the dimension needed

In [13]:
value = np.round(np.mean(predictions[-4:])).astype('int32')
predictions = np.append([predictions], [value])
predictions.shape

(260,)

In [14]:
submission = pd.read_csv('../../dengue_features_test.csv', index_col = 0)
submission = submission.loc['sj']
submission = submission.loc[:, ['year', 'weekofyear']]
submission.head()

Unnamed: 0_level_0,year,weekofyear
city,Unnamed: 1_level_1,Unnamed: 2_level_1
sj,2008,18
sj,2008,19
sj,2008,20
sj,2008,21
sj,2008,22


In [15]:
predictions = pd.Series(predictions)
predictions.index = submission.index
submission = pd.concat([submission, predictions], axis = 1)
submission.columns = ['year', 'weekofyear', 'total_cases']

In [16]:
submission.head()

Unnamed: 0_level_0,year,weekofyear,total_cases
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
sj,2008,18,0
sj,2008,19,6
sj,2008,20,0
sj,2008,21,10
sj,2008,22,4


In [17]:
submission.to_csv('sj_MLP_predictions.csv')