In [None]:
# Standard Library
import argparse
import random

# Third-Party Libraries
import torch.optim as optim
from torch.utils.data import Dataset
import pandas as pd
import networkx as nx

# Custom Modules
from MatrixVectorizer import *
from dataloaders import NoisyDataset
from model import *
from preprocessing import *
from train import *
from evaluation import *

In [None]:
# Set a fixed random seed for reproducibility across multiple libraries
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)

device = torch.device("cpu")

In [None]:
# load csvs as numpy
lr_data_path = './data/lr_train.csv'
hr_data_path = './data/hr_train.csv'

lr_train_data = pd.read_csv(lr_data_path, delimiter=',').to_numpy()
hr_train_data = pd.read_csv(hr_data_path, delimiter=',').to_numpy()
lr_train_data[lr_train_data < 0] = 0
np.nan_to_num(lr_train_data, copy=False)

hr_train_data[hr_train_data < 0] = 0
np.nan_to_num(hr_train_data, copy=False)

# map the anti-vectorize function to each row of the lr_train_data
lr_train_data_vectorized = torch.tensor([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_train_data],
                                        dtype=torch.float32)
hr_train_data_vectorized = torch.tensor([MatrixVectorizer.anti_vectorize(row, 268) for row in hr_train_data],
                                        dtype=torch.float32)

splits, (lr_test_data,hr_test_data) = load_random_files(return_matrix=True)


In [None]:
train_data = NoisyDataset(lr_train_data_vectorized, hr_train_data_vectorized, noise_level=0.5)
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True)

In [None]:
# with comments showing the values for our grid search
num_splt = 3
epochs = 200
lr = 0.00005 # try [0.0001, 0.0005, 0.00001, 0.00005]
lmbda = 17 # should be around 15-20
lamdba_topo = 0.0005 # should be around 0.0001-0.001
lr_dim = 160
hr_dim = 320
hidden_dim = 320 # try smaller and larger - [160-512]
padding = 26
dropout = 0.2 # try [0., 0.1, 0.2, 0.3]


args = argparse.Namespace()
args.epochs = epochs
args.lr = lr
args.lmbda = lmbda
args.lamdba_topo = lamdba_topo
args.lr_dim = lr_dim
args.hr_dim = hr_dim
args.hidden_dim = hidden_dim
args.padding = padding
args.p = dropout


# Final Model & Kaggle Submission

In [None]:
#final train
ks = [0.9, 0.7, 0.6, 0.5]
criterion = nn.L1Loss()
scores = []

for lr_train_data, hr_train_data, lr_val_data, hr_val_data in splits:
    final_model = GSRNet(ks, args)
    final_model.to(device)
    optimizer = optim.Adam(final_model.parameters(), lr=args.lr)
    train_data = NoisyDataset(lr_train_data, hr_train_data, noise_level=0.5)
    train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True) 
    val_data = NoisyDataset(lr_val_data, hr_val_data, noise_level=0)
    train_val_loader = torch.utils.data.DataLoader(val_data, batch_size=1) 
    

    train(final_model, train_data_loader, optimizer, criterion, args)

    preds, loss = validate(final_model,train_val_loader, criterion, args)
    scores.append(evaluate_all(hr_val_data,preds))

print(scores)

In [None]:
import pickle
filename = 'final-model.sav'
pickle.dump(final_model, open(filename, 'wb'))

In [None]:
generate_submission_csv(final_model, args)