### Training the model 

Making good Eddies prediction. You can train your one model on colab. 

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#First you need to download the files, and put them on the data folder. 
!git clone https://github.com/guillaume117/EddyForecasting.git
%cd EddyForecasting
!pip install gdown
!pip install netCDF4

#%load_ext autoreload
#%autoreload 2



import gdown
import os
from  utils.util import addGitignore
path = 'data'
addGitignore(path)

if not os.path.exists(path):
    os.makedirs(path)
else:
    pass
pass


file_url_train = "https://drive.google.com/uc?export=download&id=1RxEA59SUTbniBJrIO4Twa7w7de-QlIiH"
file_url_test = "https://drive.google.com/uc?export=download&id=1hWUzgu5UrjQNF4T4GVfX_g7jPD_HXJfg"
file_url_target = "https://drive.google.com/uc?export=download&id=1zNT1WLF3Fbsu3LEaEQxPhqPgUO7eQbDs"

gdown.download(file_url_train, output=f'{path}/OSSE_U_V_SLA_SST_2015_TRAIN_for_FORECAST.nc', quiet=False)
gdown.download(file_url_test, output=f'{path}/OSSE_U_V_SLA_SST_2015_TEST_for_FORECAST.nc', quiet=False)
gdown.download(file_url_target, output=f'{path}/eddies_TRAIN_for_FORECAST.nc', quiet=False)



In [None]:

import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from CNN.unet import UNet
from utils.train import TrainDataset
from utils.generate import GenerateDataset
from utils.device import self_device
import torch.nn as nn


#Generate dataset is a custom class that is adapted to the specific needs of the project. 
#It is used to generate the dataset from the raw data, and to record the datasets in a specific folder dataset/running_instance

#Choose the name of the running instance, it will be used to save the dataset
running_instance = 'my_instance'
dataset = GenerateDataset()
dataset.processingData(num_date=10,
                    type ='Train', #Train or Test, Test will be only used for scoring
                    running_instance = 'my_instance', #The name of the running instance, will be used to save the dataset
                    generate =True, #If True, the dataset will be generated, if False, the dataset will be loaded
                    validation_fraction=0.2 #The fraction of the dataset that will be used for validation
                    )




train_path = dataset.getTrainPath()
val_path = dataset.getValPath()


nan_path = dataset.getNanMaskLabelPath()
nan_deconv_path = dataset.getNanMaskLabelDeconvPath()

del dataset
#The dataset is saved in the dataset/running_instance folder, and can be loaded using the following command
train_dataset = torch.load(train_path)
val_dataset = torch.load(val_path)

#The nan mask are used to mask the nan values in the dataset, and are used to compute the loss. 
#The loss is not computed for the nan values
nan_masked_label = torch.load(nan_path)

#The nan mask deconv is not necessarly well named, it is the nan mask with the original shape of the dataset. It will be used 
#later to compute the score of the model. 
nan_masked_label_deconv = torch.load(nan_deconv_path)

#The device is used to define if the model will be trained on the GPU which can be Cuda, Metal or CPU, or the CPU if no GPU.
#This parameters is sent to the train, since if METAL MPS is used, tensors must be converted to float32
device = self_device()
print(f'GPU device type = {device}')





model = UNet(n_class=3,#3 classes, 0: no eddies layer, 1: cyclone, 2: anticyclone
             num_dates = 10, #The number of dates to be forecasted, must be the same as the number of dates in the dataset
             verbose=False
             )
#if you want to load a trained model, you can use 
#weights_path = 'UNet_trained/XXx.pth'
#model.load_state_dict(torch.load(weights_path))
#In this case, please take care not to generate another dataset, 
#since the dataset will be different from the one used to train the model

model.to(device)
num_epochs =100
learning_rate=0.01
batch_size = 10


optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.99))
scheduler =  lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.98)
criterion = nn.MSELoss()


TD = TrainDataset(model=model,
                  criterion=criterion,  
                  optimizer=optimizer,
                  scheduler=scheduler,
                  num_epochs=num_epochs,
                  learning_rate=learning_rate,
                  batch_size=batch_size,
                  device=device,
                  nan_mask_label=nan_masked_label[:batch_size,:,:,:],
                  running_instance=running_instance,
                  num_dates=10)


model = TD.train_model(train_dataset,val_dataset)

In [None]:
""" This part of the code is needed to load the validation dataset and get the input and labels"""
from torch.utils.data import DataLoader
val_dataset = torch.load(val_path)

val_loader = DataLoader(dataset = val_dataset, batch_size=len(val_dataset),shuffle=False, drop_last=False)
for input, labels in val_loader:
    break


In [None]:
""" This is the code to load the trained model and the weights and to make the prediction. 
The prediction is then reshaped to the original shape of the image.
The prediction is then converted to a one hot encoding and then to a label.
The label is then reshaped to the original shape of the image.
The label and the prediction are then saved to a list.

    """
from tqdm import tqdm
from torchvision import transforms
list_following_predicted = []
list_following_ground_truth=[]
model = UNet(n_class=3,num_dates = 10, verbose=False)
weights_path = 'UNet_trained/UNet_Train_1_Epoch_98_valacc_91.76040649414062.pth'
model.load_state_dict(torch.load(weights_path))
model.to('cpu')
model.eval()

for i in tqdm(range(len(input))): 
       
    item= input[i].unsqueeze(0)
    label = labels[i].unsqueeze(0)
    item =model(item)
    trans_back = transforms.Resize((357,717))
    item = trans_back(item)
    label = trans_back(label)

    reshaped_output = item.view(item.size(0), 3, 10, item.size(2), item.size(3))
    max_indices = torch.argmax(reshaped_output, dim=1)
    one_hot_encoding = torch.zeros_like(reshaped_output)
    item = one_hot_encoding.scatter_(1, max_indices.unsqueeze(1), 1)
    item =0*item[:,0,:,:,:]+1*item[:,1,:,:,:]+2*item[:,2,:,:,:]
    item = torch.where(nan_masked_label_deconv[0,:,:,:],torch.tensor(999),item)


    reshaped_label = label.view(label.size(0), 3, 10, label.size(2), label.size(3))
    max_indices_label = torch.argmax(reshaped_label, dim=1)
    one_hot_encoding_label = torch.zeros_like(reshaped_label)
    label= one_hot_encoding_label.scatter_(1, max_indices_label.unsqueeze(1), 1)
    label=0*label[:,0,:,:,:]+1*label[:,1,:,:,:]+2*label[:,2,:,:,:]
    label = torch.where(nan_masked_label_deconv[0,:,:,:],torch.tensor(999),label)


    label=label.squeeze(0)
    label_final = label[:,:,:].detach().numpy()
    list_following_ground_truth.append(label_final)

    item=item.squeeze(0)
    item_final = item[:,:,:].detach().numpy()
    list_following_predicted.append(item_final)

### Eddies animation 

<div class ='alert alert-success'>

The two next following cells are for creating video animation of prediction vs ground Truth
</div>

In [None]:

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

"""
This is the code to create a video of the prediction and the ground truth
It takes the prediction and the ground truth and reshapes them to the original shape of the image.
It then creates a video of the prediction and the ground truth.
"""


array_list_following_predicted=np.array(list_following_predicted)
#we don't want to saturate the image with the land(=999) so we set the land to 0.5
array_list_following_predicted=np.where(array_list_following_predicted>=100,0.5,array_list_following_predicted)
array_list_following_ground_truth=np.array(list_following_ground_truth)
array_list_following_ground_truth=np.where(array_list_following_ground_truth>=100,0.5,array_list_following_ground_truth)
fig, ax = plt.subplots(1, 2, figsize = (40, 32))
def init():
   
    ax[0].imshow(array_list_following_predicted[10][0],origin="lower")
    ax[1].imshow(array_list_following_ground_truth[10][0],origin="lower")
 
    return [ax]

def update(frame):
    ax[0].imshow(array_list_following_predicted[10][frame],origin="lower")
    ax[1].imshow(array_list_following_ground_truth[10][frame],origin="lower")

    return [ax]


animation = FuncAnimation(fig, update, frames=len(array_list_following_predicted[1]), init_func=init)

output_video = 'predicted_video_eddies_predict_vs_ground_truth.mp4'

animation.save(output_video, fps=1, extra_args=['-vcodec', 'libx264'])

### Attention ne pas supprimer cette cellule :

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn
import matplotlib.pyplot as plt2

def confusion_matrix_(gT_label, pred_label):
    """ This function takes the ground truth and the prediction and computes the confusion matrix"""
    
    gT_label = np.array(gT_label).flatten()
    pred_label = np.array(pred_label).flatten()
    cm = confusion_matrix(gT_label,pred_label,labels=[0,1,2],normalize='true')
    return cm


cm = confusion_matrix_(array_list_following_ground_truth,array_list_following_predicted)
ax = seaborn.heatmap(100*cm,annot=True, cmap="crest",linewidths=0.05)
ax.set(xlabel="Ground Truth ", ylabel="Prediction")
ax.xaxis.tick_top()
plt2.show()

print(100*cm)