In [1]:
import pandas as pd
from torch.utils.data import DataLoader
import numpy as np
import tft_model
import ts_dataset
import expt_settings.configs
import importlib
from data_formatters import utils
import torch
import os
import pickle

In [2]:
ExperimentConfig = expt_settings.configs.ExperimentConfig
config = ExperimentConfig('gas_production', 'outputs')
data_formatter = config.make_data_formatter()
data_csv_path = config.data_csv_path
test_csv_path = os.path.join(config.data_folder, 'GasProductionTFTTest.csv')
train_csv_path = os.path.join(config.data_folder, 'GasProductionTFTTrain.csv')
valid_csv_path = os.path.join(config.data_folder, 'GasProductionTFTValid.csv')

raw_data = pd.read_csv(data_csv_path, index_col=0)
train = pd.read_csv(train_csv_path)
valid = pd.read_csv(valid_csv_path)
test = pd.read_csv(test_csv_path)
# Sets up default params
data_formatter.set_scalers(train, set_real=True)
# Use all data for label encoding  to handle labels not present in training.
data_formatter.set_scalers(raw_data, set_real=False)
test_transformed = data_formatter.transform_inputs(test)

In [3]:
fixed_params = data_formatter.get_experiment_params()
params = data_formatter.get_default_model_params

fixed_params.update(params)
fixed_params['batch_first'] = True
fixed_params['name'] = 'test'
fixed_params['device'] = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fixed_params['minibatch_size'] = 256
fixed_params['quantiles'] = [0.5]

In [14]:
model = tft_model.TFT(fixed_params).to(fixed_params['device'])
model.load_state_dict(config.model_folder + '/gas_production_best_model_loss.pth')
model.eval()

{'total_time_steps': 35, 'num_encoder_steps': 28, 'num_epochs': 100, 'early_stopping_patience': 5, 'multiprocessing_workers': 5, 'column_definition': [('WellNo', <DataTypes.CATEGORICAL: 1>, <InputTypes.ID: 4>), ('Date', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('Daily_104m3', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('WellHeadPressure', <DataTypes.REAL_VALUED: 0>, <InputTypes.OBSERVED_INPUT: 1>), ('CasingHeadPressure', <DataTypes.REAL_VALUED: 0>, <InputTypes.OBSERVED_INPUT: 1>), ('WellHeadTemperature', <DataTypes.REAL_VALUED: 0>, <InputTypes.OBSERVED_INPUT: 1>), ('Daily_h', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('Elapsed', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('ElapsedProduction', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('Allocation', <DataTypes.REAL_VALUED: 0>, <InputTypes.STATIC_INPUT: 3>), ('Cluster', <DataTypes.CATEGORICAL: 1>, <InputTypes.STATIC_INPUT: 3>)], 'input_size': 9, 'output_size': 1, 'category_count

TypeError: Expected state_dict to be dict-like, got <class 'str'>.

In [None]:
test_ds = ts_dataset.TSDataset(fixed_params, test_transformed, num_samples=-1)
test_loader = DataLoader(
            test_ds,
            batch_size=fixed_params['minibatch_size'],
            num_workers=4,
            shuffle=False
        )

In [None]:
dfs = []
for idx, batch in enumerate(test_loader):
    with torch.no_grad():
        output, all_inputs, attention_components = model(batch['inputs'])
        flat_prediction = pd.DataFrame(
          output.detach().cpu().numpy()[:, :, 0],
          columns=[
              't+{}'.format(i)
              for i in range(18)
          ])
        cols = list(flat_prediction.columns)
#         flat_prediction['forecast_time'] = batch['time'][:, 54 - 1, 0]
        flat_prediction['identifier'] = batch['identifier'][0][0].detach().cpu().numpy()
        dfs.append(flat_prediction)

In [None]:
all_predictions = pd.concat(dfs)

In [None]:
all_predictions_unnormalized = data_formatter.format_predictions(all_predictions)

In [None]:
all_predictions_unnormalized

In [None]:
def symmetric_mean_absolute_percentage_error(forecast, actual):
    # Symmetric Mean Absolute Percentage Error (SMAPE)
    return np.mean(2 * np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)))

In [None]:
actuals = pd.read_csv('/home/arda/Desktop/thesis/datasets/m4/Test/Monthly-test.csv')

In [None]:
np.concatenate(actuals.drop(columns=['V1']).values).shape

In [None]:
np.concatenate(all_predictions_unnormalized.drop(columns=['identifier']).values).shape

In [None]:
symmetric_mean_absolute_percentage_error(np.concatenate(all_predictions_unnormalized.drop(columns=['identifier']).values) ,np.concatenate(actuals.drop(columns=['V1']).values))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

ind = np.random.choice(128)
print(ind)
plt.plot(output[ind,:,1].detach().cpu().numpy(), label='pred')
# plt.plot(output[ind,:,1].detach().cpu().numpy(), label='pred_5')
# plt.plot(output[ind,:,2].detach().cpu().numpy(), label='pred_9')

plt.plot(batch['outputs'][ind,:,0], label='true')
plt.legend()