In [14]:
# IMPORT PACKAGES
import numpy as np
import os
os.chdir("/projects/ml4science/LakeBeD-US/LakeBeD-US-CSE-Benchmark/")
import pandas as pd
import pickle
from pypots.imputation import SAITS
import random
import torch
from Modeling.Utilities import utilities

In [15]:
data = pd.read_csv("Data/2-Data-Harmonization/Mendota_2006_2023.csv")
data["datetime"] = pd.to_datetime(data["datetime"])
data

Unnamed: 0,datetime,chla_rfu,do,par,phyco,temp
0,2006-06-28,,,,,22.22
1,2006-06-29,,,,,22.34
2,2006-06-30,,,,,22.84
3,2006-07-01,,,,,22.40
4,2006-07-02,,,,,22.40
...,...,...,...,...,...,...
6349,2023-11-15,1.21,9.23,0.0,0.42,9.33
6350,2023-11-16,1.52,9.51,0.0,0.46,9.36
6351,2023-11-17,1.51,9.50,0.0,0.45,9.32
6352,2023-11-18,1.67,9.58,0.0,0.46,9.15


In [16]:
lookback_window = 21
training_data, validation_data, testing_data, standardization_parameters = utilities.prepare_data(
	data = data,
	input_features = ["chla_rfu", "par", "phyco", "do", "temp"],
	target_features = ["do", "temp"],
	training_fraction = 0.8,
	validation_fraction = 0.1,
	testing_fraction = 0.1,
	lookback_window = lookback_window,
	horizon_window = 14)

In [17]:
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [18]:
saits = SAITS(n_steps = lookback_window,
			  n_features = 5,
			  n_layers = 2,
			  d_model = 256,
			  n_heads = 4,
			  d_k = 64, 
			  d_v = 64, 
			  d_ffn = 128, 
			  dropout = 0.1,
			  epochs = 50,
			  device = torch.device("cuda:0"),
			  batch_size = 32,
			  saving_path = "Data/3-Data-Imputation", 
			  verbose = True)

2024-11-01 23:36:57 [INFO]: Using the given device: cuda:0
2024-11-01 23:36:57 [INFO]: Model files will be saved to Data/3-Data-Imputation/20241101_T233657
2024-11-01 23:36:57 [INFO]: Tensorboard file will be saved to Data/3-Data-Imputation/20241101_T233657/tensorboard
2024-11-01 23:36:58 [INFO]: SAITS initialized with the given hyperparameters, the number of trainable parameters: 1,324,719


In [19]:
saits.fit({'X': training_data["windowed_data"]['X']})

2024-11-01 23:37:01 [INFO]: Epoch 001 - training loss: 0.7069
2024-11-01 23:37:04 [INFO]: Epoch 002 - training loss: 0.4987
2024-11-01 23:37:06 [INFO]: Epoch 003 - training loss: 0.4383
2024-11-01 23:37:09 [INFO]: Epoch 004 - training loss: 0.4031
2024-11-01 23:37:12 [INFO]: Epoch 005 - training loss: 0.3730
2024-11-01 23:37:15 [INFO]: Epoch 006 - training loss: 0.3531
2024-11-01 23:37:18 [INFO]: Epoch 007 - training loss: 0.3415
2024-11-01 23:37:21 [INFO]: Epoch 008 - training loss: 0.3355
2024-11-01 23:37:24 [INFO]: Epoch 009 - training loss: 0.3295
2024-11-01 23:37:27 [INFO]: Epoch 010 - training loss: 0.3209
2024-11-01 23:37:29 [INFO]: Epoch 011 - training loss: 0.3177
2024-11-01 23:37:31 [INFO]: Epoch 012 - training loss: 0.3210
2024-11-01 23:37:34 [INFO]: Epoch 013 - training loss: 0.3079
2024-11-01 23:37:37 [INFO]: Epoch 014 - training loss: 0.3056
2024-11-01 23:37:39 [INFO]: Epoch 015 - training loss: 0.3006
2024-11-01 23:37:42 [INFO]: Epoch 016 - training loss: 0.2999
2024-11-

In [20]:
training_data["windowed_data_imputed"]['X'] = saits.impute({'X': training_data["windowed_data"]['X']})
validation_data["windowed_data_imputed"]['X'] = saits.impute({'X': validation_data["windowed_data"]['X']})
testing_data["windowed_data_imputed"]['X'] = saits.impute({'X': testing_data["windowed_data"]['X']})

In [21]:
with open("Data/3-Data-Imputation/Training_Data.pickle", "wb") as f:
	pickle.dump(training_data, f)

with open("Data/3-Data-Imputation/Validation_Data.pickle", "wb") as f:
	pickle.dump(validation_data, f)

with open("Data/3-Data-Imputation/Testing_Data.pickle", "wb") as f:
	pickle.dump(testing_data, f)

with open("Data/3-Data-Imputation/Standardization_Parameters.pickle", "wb") as f:
	pickle.dump(standardization_parameters, f)