## Ablation study on KS dataset for contraction length for training PFNN. 
A comparison of different trained PFNN on short-term predictions after contraction.

In [None]:
cd ..

In [None]:
import numpy as np
import pandas as pd
import torch
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from model.utilities import *
from model.koopman_base import *
import sys
sys.path.append('./model')
from sklearn.decomposition import PCA

import numpy.random as random
from scipy.stats import gaussian_kde


font = {'size'   : 12, 'family': 'Times New Roman'}
matplotlib.rc('font', **font)

In [None]:
torch.manual_seed(0)
np.random.seed(0)

# Main
n_train = 1000
n_test = 100

sub = 4 # spatial subsample
S = 512
s = S//sub

T_in = 500 # skip first 100 seconds of each trajectory to let trajectory reach attractor
T = 200 # seconds to extract from each trajectory in data
T_out = T_in + T
step = 1 # Seconds to learn solution operator

# Load data
predloader = MatReader('../lake/data/KS.mat')
data_raw = predloader.read_field('u')
data_tensor = torch.tensor(data_raw, dtype=torch.float)[...,::sub]

# randomly sample half episodes from the train data episodes
episode_samples = int(0.5*n_train)
data_sampled_train = data_tensor[torch.randperm(data_tensor[:n_train].size(0))[:episode_samples],:,:]
# data_sampled_test = data_tensor[torch.randperm(data_tensor[-n_test:].size(0)),:,:]
data_test = data_tensor[-n_test:,:,:]

train_sample = data_sampled_train[:,T_in:T_out,:].reshape(-1, s)
test_a = data_test[:,T_in-1:T_out-1,:].reshape(-1, s)
test_u = data_test[:,T_in:T_out,:].reshape(-1, s)
batch_size = 100
test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_a, test_u), batch_size=batch_size, shuffle=False)

  data_tensor = torch.tensor(data_raw, dtype=torch.float)[...,::sub]


In [None]:
device = torch.device('cpu')

PFNN_step_100_path = 'fill_PFNN_C_lsi_1_model_path'
model_step_100 = torch.load(PFNN_step_100_path, map_location=device)

PFNN_step_300_path = 'fill_PFNN_C_lsi_3_model_path'
model_step_300 = torch.load(PFNN_step_300_path, map_location=device)

PFNN_step_500_path = 'fill_PFNN_C_lsi_5_model_path'
model_step_500 = torch.load(PFNN_step_500_path, map_location=device)

PFNN_step_700_path = 'fill_PFNN_C_lsi_7_model_path'
model_step_700 = torch.load(PFNN_step_700_path, map_location=device)

PFNN_step_900_path = 'fill_PFNN_C_lsi_9_model_path'
model_step_900 = torch.load(PFNN_step_900_path, map_location=device)

In [None]:
# generating T steps of data
def episode_l2_loss(pred, truth, n = 100):
    return torch.mean((pred[:n] - truth[:n])**2)

def episode_loss_collection(regressive_steps, loss_fn, test_u, pred_1, pred_2, pred_3, pred_4, pred_5):
      loss_dict = {}

      loss_step_100 = loss_fn(pred_1, test_u, n=regressive_steps)
      loss_step_300 = loss_fn(pred_2, test_u, n=regressive_steps)
      loss_step_500 = loss_fn(pred_3, test_u, n=regressive_steps)
      loss_step_700 = loss_fn(pred_4, test_u, n=regressive_steps)
      loss_step_900 = loss_fn(pred_5, test_u, n=regressive_steps)
            
      loss_dict['step_100'] = loss_step_100.item()
      loss_dict['step_300'] = loss_step_300.item()
      loss_dict['step_500'] = loss_step_500.item()
      loss_dict['step_700'] = loss_step_700.item()
      loss_dict['step_900'] = loss_step_900.item()

      return loss_dict

In [None]:
steps_n_list = np.array([100])
columns = ['step_100', 'step_300', 'step_500', 'step_700', 'step_900']
error_df_list = {}
for steps_n in steps_n_list:
      print('steps_n:', steps_n, 'started.')
      error_df = pd.DataFrame(columns=columns)
      for init_id in tqdm(np.arange(n_test)):
            step_100_long_pred = long_prediction(model_step_100, test_a, init_id, 1, s, s, T=steps_n)
            step_300_long_pred = long_prediction(model_step_300, test_a, init_id, 1, s, s, T=steps_n)
            step_500_long_pred = long_prediction(model_step_500, test_a, init_id, 1, s, s, T=steps_n)
            step_700_long_pred = long_prediction(model_step_700, test_a, init_id, 1, s, s, T=steps_n)
            step_900_long_pred = long_prediction(model_step_900, test_a, init_id, 1, s, s, T=steps_n)

            episode_loss_dict = episode_loss_collection(steps_n, episode_l2_loss, test_u[int(init_id*T):], step_100_long_pred, step_300_long_pred, step_500_long_pred, step_700_long_pred, step_900_long_pred)
            error_df.loc[init_id] = episode_loss_dict
      error_df_list['step_{}'.format(steps_n)] = error_df

steps_n: 100 started.


100%|██████████| 100/100 [00:05<00:00, 17.25it/s]


In [None]:
error_mean_df = pd.DataFrame(
      columns=columns)
for key in error_df_list.keys():
      error_mean_df.loc[key] = (np.sqrt(error_df_list[key])).mean()
error_std_df = pd.DataFrame(
      columns=columns)
for key in error_df_list.keys():
      error_std_df.loc[key] = (np.sqrt(error_df_list[key])).std()

In [None]:
error_mean_df

Unnamed: 0,step_100,step_300,step_500,step_700,step_900
step_100,37432.71029,2.991272,1.052192,0.963702,0.91051


In [None]:
range_list = []
for i in range (n_test):
      range_list.append((test_u[T*i:T*(i+1)].max() - test_u[T*i:T*(i+1)].min()).item())
range_list = np.array(range_list)
range_list_rep = range_list[:,None].repeat(6, axis=1)
range_mean = range_list.mean()
range_max = range_list.max()
print('range_mean:', range_mean, 'range_max:', range_max)

range_mean: 6.3963081169128415 range_max: 6.994479179382324


In [None]:
error_mean_percent_df = 100*error_mean_df/range_mean.item()
error_std_percent_df = 100*error_std_df/range_mean.item()

### NRMSE in percentage (for 100 steps prediction) ablation results for model trained on different length of relaxation time (steps)

In [None]:
error_mean_percent_df

Unnamed: 0,step_100,step_300,step_500,step_700,step_900
step_100,585223.688498,46.765596,16.449982,15.066542,14.234923


In [None]:
error_std_percent_df

Unnamed: 0,step_100,step_300,step_500,step_700,step_900
step_100,1910694.0,12.980957,1.865617,2.228398,2.236051
