# predicting tyming of data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd drive/MyDrive/'Colab Notebooks'/eecs545/twinning

In [None]:
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn 
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

np.random.seed(1234)

%matplotlib inline
def split(X, Y):
    Xtrain, Xtemp, Ytrain, Ytemp = train_test_split(X, Y, train_size = .7)
    Xtest, Xvalid, Ytest, Yvalid = train_test_split(Xtemp, Ytemp, train_size= .5)
    return Xtrain, Xvalid, Xtest, Ytrain, Yvalid, Ytest

def MAPE(true, pred):
    return [(np.abs(pred[i] - y_test[i]).mean() / y_test[i].mean()).item() for i in range(len(true))]

input_size = 6
output_size = 500

In [None]:
class MLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(input_size, 64),
      nn.BatchNorm1d(64),
      nn.Dropout(0.5),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.BatchNorm1d(32),
      nn.Dropout(0.5),
      nn.ReLU(),
      nn.Linear(32, 32),
      nn.BatchNorm1d(32),
      nn.Dropout(0.5),
      nn.ReLU(),
      nn.Linear(32, output_size)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

def train_model(dataloader, model, loss_fn, optimizer, epochs):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epochs % 10 == 0:
            if batch % 10 == 0:
                loss, current = loss.item(), batch * len(X)
                print(f"Train loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_model(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
    test_loss /= num_batches
    test_loss /= output_size
    print(f"Val Error: \n Avg loss: {test_loss:>8f} \n")

def predict_model(X, model):
    with torch.no_grad():
        return model(X)

## Prepare data set

In [None]:
data = pd.read_csv("../data/transient_pred/scalar_res.csv")
data = data.iloc[:, 0:6]
sscaler = StandardScaler()
# sscaler = MinMaxScaler()
X = sscaler.fit_transform(data)

In [None]:
data.shape

In [None]:
pred_Data = pd.read_csv("../data/transient_pred/P1_log.csv")
pred_Data.shape

In [None]:
valiables = ["h1_log",
             "h2_log",
             "Eturb_log",
             "omega_log",
             "P1_log",
             "P2_log",
            "Pout_log",
             "rho1_log",
             "rho2_log",
             "T1_log",
             "T2_log",
             "Tboil_log",
             "x1_log",
             "x2_log"
             ]

In [None]:
total_mse = []
total_mape = []
score = []
for val in valiables:
    path = "../data/transient_pred/" +  val + ".csv"
    pred_Data = pd.read_csv(path)
    Y = pred_Data.iloc[0:len(data), 0:500].values
    Xtrain, Xvalid, Xtest, Ytrain, Yvalid, Ytest = split(X, Y)
    X_train = torch.from_numpy(Xtrain.astype(np.float32))
    y_train = torch.from_numpy(Ytrain.astype(np.float32))
    X_valid = torch.from_numpy(Xvalid.astype(np.float32))
    y_valid = torch.from_numpy(Yvalid.astype(np.float32))
    X_test = torch.from_numpy(Xtest.astype(np.float32))
    y_test = torch.from_numpy(Ytest.astype(np.float32))

    train = TensorDataset(X_train,y_train)
    valid = TensorDataset(X_valid,y_valid)
    test = TensorDataset(X_test,y_test)

    trainloader = torch.utils.data.DataLoader(train, batch_size=300, shuffle=True, num_workers=1)
    validloader = torch.utils.data.DataLoader(valid, batch_size=300, shuffle=False, num_workers=1)
    testloader = torch.utils.data.DataLoader(test, batch_size=300, shuffle=False, num_workers=1)

    mlp = MLP()
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-2)
    epochs = 50
    for t in range(epochs):
        if (t % 10 ==0):
            print(f"Epoch {t+1}\n-------------------------------")
        train_model(trainloader, mlp, loss_function, optimizer, t)
        if (t % 10 ==0):
            test_model(validloader, mlp, loss_function)

    total_mse.append(((loss_function(y_test, mlp(X_test)) / output_size ) ** 0.5).item())
    pred = predict_model(X_test, mlp)
    # if score == []:
    #     score = MAPE(y_test, pred)
    # else:
    #     score = np.stack([score, [MAPE(y_test, pred)]])
    total_mape.append(np.mean(MAPE(y_test, pred)))
    score.append(MAPE(y_test, pred))
    print(val + " Done!")

In [None]:
total_mse

In [None]:
total_mape

In [None]:
np.array(score)

In [None]:
import matplotlib.pyplot as plt
t = range(output_size)
plt.plot(t, y_test[10])
plt.plot(t, pred[10])

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
medianprops = dict(linewidth=3, color='red')
meanprops = dict(marker='X', markerfacecolor='black', markersize=9,markeredgecolor='none')
bp = ax.boxplot(score[0:11], labels = valiables[0:11], medianprops=medianprops, showmeans=True, meanprops=meanprops)
ax.set_ylim([0, 2])
# plt.title('Prediction Score by Variables(MAPE)')
ax.set_xlabel('Variable Name')
ax.set_ylabel('Score(MAPE)')
plt.rcParams['font.size'] = '15'
plt.tight_layout()
plt.grid()

In [None]:
np.mean( score[11])