In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


# Part 1: Get the LUCAS Data

In [2]:
import dask.dataframe as dd
import glob
import pandas as pd
import torch

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:

# Define the folder path
folder_path = '/content/drive/MyDrive/MLTRANS/FilteredLucas/'

# List Excel files in the folder
excel_files = glob.glob(folder_path + '*.xlsx')

# Create a list of tuples with (file name, Dask DataFrame) pairs
dfs = [(file,dd.from_pandas(pd.read_excel(file), npartitions=1)) for file in excel_files]

# You now have a list of tuples where each tuple contains the file name and its associated Dask DataFrame.


In [4]:
fileNpp, NppDf = dfs[0]
fileLst, LstDf = dfs[1]
fileLAI, LAIDf = dfs[2]
fileEvapo, EvapoDf = dfs[3]
fileElevation, ElevationDf = dfs[4]

In [5]:
NppDf

Unnamed: 0_level_0,id,x,y,survey_date,point_ids,OC
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,int64,int64,int64,datetime64[ns],int64,float64
16728,...,...,...,...,...,...


# Part 2: Get the VAEs

In [6]:
import numpy as np
import os
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import dask.dataframe as dd
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim
import random

In [7]:
class ResDown(nn.Module):
    """
    Residual down sampling block for the encoder
    """

    def __init__(self, channel_in, channel_out, kernel_size=3):
        super(ResDown, self).__init__()
        self.conv1 = nn.Conv2d(channel_in, channel_out // 2, kernel_size, 2, kernel_size // 2)
        self.bn1 = nn.BatchNorm2d(channel_out // 2, eps=1e-4)
        self.conv2 = nn.Conv2d(channel_out // 2, channel_out, kernel_size, 1, kernel_size // 2)
        self.bn2 = nn.BatchNorm2d(channel_out, eps=1e-4)

        self.conv3 = nn.Conv2d(channel_in, channel_out, kernel_size, 2, kernel_size // 2)

        self.act_fnc = nn.ELU()

    def forward(self, x):
        skip = self.conv3(x)
        x = self.act_fnc(self.bn1(self.conv1(x)))
        x = self.conv2(x)
        return self.act_fnc(self.bn2(x + skip))


class ResUp(nn.Module):
    """
    Residual up sampling block for the decoder
    """

    def __init__(self, channel_in, channel_out, kernel_size=3, scale_factor=2):
        super(ResUp, self).__init__()

        self.conv1 = nn.Conv2d(channel_in, channel_in // 2, kernel_size, 1, kernel_size // 2)
        self.bn1 = nn.BatchNorm2d(channel_in // 2, eps=1e-4)
        self.conv2 = nn.Conv2d(channel_in // 2, channel_out, kernel_size, 1, kernel_size // 2)
        self.bn2 = nn.BatchNorm2d(channel_out, eps=1e-4)

        self.conv3 = nn.Conv2d(channel_in, channel_out, kernel_size, 1, kernel_size // 2)

        self.up_nn = nn.Upsample(scale_factor=scale_factor, mode="nearest")

        self.act_fnc = nn.ELU()

    def forward(self, x):
        x = self.up_nn(x)
        skip = self.conv3(x)
        x = self.act_fnc(self.bn1(self.conv1(x)))
        x = self.conv2(x)

        return self.act_fnc(self.bn2(x + skip))


In [8]:

# Define the encoder network
class EvapoEncoder(nn.Module):
    def __init__(self, latent_dim):
        super(EvapoEncoder, self).__init__()
        self.conv_in = nn.Conv2d(1, 8, 7, 1, padding=1)
        # self.dropout = nn.Dropout(p=0.3)
        self.res_down_block1 = ResDown(8, 16)
        self.res_down_block2 = ResDown(16, 32)
        self.res_down_block3 = ResDown(32,64)
        self.res_down_block4 = ResDown(64, 256)


        self.flatten = nn.Flatten()
        self.conv_mu = nn.Linear(1024, latent_dim,)
        self.conv_log_var = nn.Linear(1024, latent_dim)
        self.act_fnc = nn.ELU()


    def forward(self, x):
        x = self.act_fnc(self.conv_in(x))
        # x = self.dropout(x)
        x = self.res_down_block1(x)  # 32
        x = self.res_down_block2(x)  # 16
        x = self.res_down_block3(x)  # 8
        x = self.res_down_block4(x)  # 8

        x =  self.flatten(x)
        mu = self.conv_mu(x)  # 1
        logvar = self.conv_log_var(x)  # 1

        return mu, logvar

# Define the decoder network
class EvapoDecoder(nn.Module):
    def __init__(self, latent_dim):
        super(EvapoDecoder, self).__init__()
        self.latent_dim = latent_dim
        # self.linear = nn.Linear(self.latent_dim,self.latent_dim)
        # self.act_fnc1 = nn.ELU()
        self.conv_t_up = nn.ConvTranspose2d(latent_dim, 128, 4, 2,padding=0)
        self.act_fnc2 = nn.ELU()
        self.act_fnc = nn.ELU()

                # Adding dropout layer
        # self.dropout = nn.Dropout(p=0.3)
        self.res_up_block1 = ResUp(128, 64)
        self.res_up_block2 = ResUp(64, 32)
        self.res_up_block3 = ResUp(32,16)
        self.res_up_block4 = ResUp(16,8)
        self.res_up_block5 = ResUp(8,4)
        self.res_down_block1 = ResDown(4, 16)
        self.res_down_block2 = ResDown(16, 32)


        self.conv_out1 = nn.Conv2d(32, 1, 4, stride=1, padding=2)


    def forward(self, x):
        # x = self.act_fnc1(self.linear(x))
        x = x.view(x.shape[0], self.latent_dim, 1, 1)  # Reshape into a 5x5 tensor
        x = self.act_fnc2(self.conv_t_up(x))  # 4
        # x = self.dropout(x)
        x = self.res_up_block1(x)  # 8
        x = self.res_up_block2(x)  # 16
        x = self.res_up_block3(x)  # 32
        x = self.res_up_block4(x)  # 32
        x =  self.res_up_block5(x)
        x = self.res_down_block1(x)  # 32
        x = self.res_down_block2(x)  # 16
        x = self.conv_out1(x)

        return  x

# Combine the encoder and decoder to form the VAE
class EvapoVAE(nn.Module):
    def __init__(self, latent_dim):
        super(EvapoVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = EvapoEncoder(self.latent_dim)
        self.decoder = EvapoDecoder(self.latent_dim)

    def encode(self, x):
        return self.encoder(x)

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed_x = self.decode(z)
        return reconstructed_x, mu, logvar



In [9]:

# Define the encoder network
class ElevationEncoder(nn.Module):
    def __init__(self, latent_dim):
        super(ElevationEncoder, self).__init__()
        self.latent_dim = latent_dim
        self.conv_in = nn.Conv2d(1, 4, 7, 1, 3)
        self.res_down_block1 = ResDown(4, 8)
        self.res_down_block2 = ResDown(8, 16)
        self.res_down_block3 = ResDown(16,32)
        self.res_down_block4 = ResDown(32, 64)
        self.conv_mu = nn.Conv2d(64, latent_dim, 5, 1)
        self.conv_log_var = nn.Conv2d(64, latent_dim, 5, 1)
        self.act_fnc = nn.ELU()


    def forward(self, x):
        x = self.act_fnc(self.conv_in(x))
        x = self.res_down_block1(x)  # 32
        x = self.res_down_block2(x)  # 16
        x = self.res_down_block3(x)  # 8
        x = self.res_down_block4(x)
        mu = self.conv_mu(x)  # 1
        logvar = self.conv_log_var(x)  # 1

        return mu, logvar

# Define the decoder network
class ElevationDecoder(nn.Module):
    def __init__(self, latent_dim):
        super(ElevationDecoder, self).__init__()
        self.latent_dim = latent_dim
        self.conv_t_up = nn.ConvTranspose2d(latent_dim, 128, 4, 1)
        self.res_up_block1 = ResUp(128, 32)
        self.res_up_block2 = ResUp(32, 16)
        self.res_up_block3 = ResUp(16,8)
        self.res_up_block4 = ResUp(8,4)
        self.res_up_block5 = ResUp(4,2)

        self.res_down_block1 = ResDown(2,16)


        self.conv_out1 = nn.Conv2d(16, 1, 2, stride=1, padding=1)
        # self.conv_out3 = nn.Conv2d(2, 1, 3, 1, 1)

        self.act_fnc = nn.ELU()
        self.act_fnc2 = nn.ELU()


    def forward(self, x):
        x = x.view(x.shape[0], self.latent_dim, 1, 1)
        x = self.act_fnc(self.conv_t_up(x))  # 4
        x = self.res_up_block1(x)  # 8
        x = self.res_up_block2(x)  # 16
        x = self.res_up_block3(x)  # 32
        x = self.res_up_block4(x)  # 32
        x = self.res_up_block5(x)  # 32
        x = self.res_down_block1(x)  # 32

        x = self.conv_out1(x)
        return x

# Combine the encoder and decoder to form the VAE
class ElevationVAE(nn.Module):
    def __init__(self, latent_dim):
        super(ElevationVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = ElevationEncoder(latent_dim)
        self.decoder = ElevationDecoder(latent_dim)

    def encode(self, x):
        return self.encoder(x)

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed_x = self.decode(z)
        return reconstructed_x, mu, logvar

        return reconstructed_x, mu, logvar



In [10]:
#@title
# Define the encoder network
class LAIEncoder(nn.Module):
    def __init__(self, latent_dim):
        super(LAIEncoder, self).__init__()
        self.conv_in = nn.Conv2d(1, 16, 7, 1, 3)

        self.res_down_block1 = ResDown(16,32)
        self.res_down_block2 = ResDown(32, 64)
        self.res_down_block3 = ResDown(64, 128)
        self.res_down_block4 = ResDown(128, 128)

        self.flatten = nn.Flatten()
        self.conv_mu = nn.Linear(1152, latent_dim,)
        self.conv_log_var = nn.Linear(1152, latent_dim)
        self.act_fnc = nn.ELU()


    def forward(self, x):
        x = self.act_fnc(self.conv_in(x))

        x = self.res_down_block1(x)  # 8
        x = self.res_down_block2(x)  # 8
        x = self.res_down_block3(x)  # 8
        x = self.res_down_block4(x)  # 8
        x = self.flatten(x)
        mu = self.conv_mu(x)  # 1
        logvar = self.conv_log_var(x)  # 1

        return mu, logvar

# Define the decoder network
class LAIDecoder(nn.Module):
    def __init__(self, latent_dim):
        super(LAIDecoder, self).__init__()
        self.latent_dim = latent_dim
        self.act_fnc1 = nn.ELU()
        self.conv_t_up = nn.ConvTranspose2d(latent_dim, 128, 4, 1)
        self.res_up_block1 = ResUp(128, 64)
        self.res_up_block2 = ResUp(64, 32)
        self.res_up_block3 = ResUp(32, 16)
        # self.res_up_block4 = ResUp(16,8)
        # self.res_down_block1 = ResDown(8,4)
        self.conv_out = nn.Conv2d(16, 1, 4, stride=1, padding=2)


    def forward(self, x):
        x = x.view(x.shape[0], self.latent_dim, 1, 1)  # Reshape into a 5x5 tensor
        x = self.act_fnc1(self.conv_t_up(x))  # 4
        x = self.res_up_block1(x)  # 8
        x = self.res_up_block2(x)  # 16
        x = self.res_up_block3(x)  # 32
        # x = self.res_up_block4(x)  # 32
        # x = self.res_down_block1(x)  # 32
        x = self.conv_out(x)

        # x = torch.tanh(self.conv_out3(x))
        return x

# Combine the encoder and decoder to form the VAE
class LAIVAE(nn.Module):
    def __init__(self, latent_dim):
        super(LAIVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = LAIEncoder(latent_dim)
        self.decoder = LAIDecoder(latent_dim)

    def encode(self, x):
        return self.encoder(x)

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed_x  = self.decode(z)
        return reconstructed_x, mu, logvar



In [11]:

# Define the encoder network
class NppEncoder(nn.Module):
    def __init__(self, latent_dim):
        super(NppEncoder, self).__init__()
        self.latent_dim = latent_dim
        self.conv_in = nn.Conv2d(1, 4, 5, 1, 3)
        self.res_down_block1 = ResDown(4, 8)
        self.res_down_block2 = ResDown(8, 16)
        self.res_down_block3 = ResDown(16,32)
        self.res_down_block4 = ResDown(32, 64)
        self.conv_mu = nn.Conv2d(64, latent_dim, 3, 1)
        self.conv_log_var = nn.Conv2d(64, latent_dim, 3, 1)
        self.act_fnc = nn.ELU()


    def forward(self, x):
        x = self.act_fnc(self.conv_in(x))
        x = self.res_down_block1(x)  # 32
        x = self.res_down_block2(x)  # 16
        x = self.res_down_block3(x)  # 8
        x = self.res_down_block4(x)  # 8
        mu = self.conv_mu(x)  # 1
        logvar = self.conv_log_var(x)  # 1

        return mu, logvar

# Define the decoder network
class NppDecoder(nn.Module):
    def __init__(self, latent_dim):
        super(NppDecoder, self).__init__()
        self.latent_dim = latent_dim
        self.conv_t_up = nn.ConvTranspose2d(latent_dim, 256, 4, 2)
        self.res_up_block1 = ResUp(256, 64)
        self.res_up_block2 = ResUp(64, 32)
        self.res_up_block3 = ResUp(32,16)
        self.res_up_block4 = ResUp(16,8)
        self.res_down_block1 = ResDown(8,16)


        self.conv_out1 = nn.Conv2d(16, 1, 4, stride=1, padding=2)


        self.act_fnc = nn.ELU()
        self.act_fnc2 = nn.ELU()


    def forward(self, x):
        x = x.view(x.shape[0], self.latent_dim, 1, 1)
        x = self.act_fnc(self.conv_t_up(x))  # 4
        x = self.res_up_block1(x)  # 8
        x = self.res_up_block2(x)  # 16
        x = self.res_up_block3(x)  # 32
        x = self.res_up_block4(x)  # 32
        x = self.res_down_block1(x)


        x = self.conv_out1(x)

        return x

# Combine the encoder and decoder to form the VAE
class NppVAE(nn.Module):
    def __init__(self, latent_dim):
        super(NppVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = NppEncoder(latent_dim)
        self.decoder = NppDecoder(latent_dim)

    def encode(self, x):
        return self.encoder(x)

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed_x = self.decode(z)
        return reconstructed_x, mu, logvar


In [12]:

# Define the encoder network
class LSTEncoder(nn.Module):
    def __init__(self, latent_dim):
        super(LSTEncoder, self).__init__()
        self.conv_in = nn.Conv2d(1, 16, 7, 1, 3)

        self.res_down_block1 = ResDown(16,32)
        self.res_down_block2 = ResDown(32, 64)
        self.res_down_block3 = ResDown(64, 128)

        self.conv_mu = nn.Conv2d(128, latent_dim, 3, 1)
        self.conv_log_var = nn.Conv2d(128, latent_dim, 3, 1)
        self.act_fnc = nn.ELU()
        self.latent_dim = latent_dim


    def forward(self, x):
        x = self.act_fnc(self.conv_in(x))
        x = self.res_down_block1(x)  # 32
        x = self.res_down_block2(x)  # 16
        x = self.res_down_block3(x)  # 8
        mu = self.conv_mu(x)  # 1
        logvar = self.conv_log_var(x)  # 1

        return mu, logvar

# Define the decoder network
class LSTDecoder(nn.Module):
    def __init__(self, latent_dim):
        super(LSTDecoder, self).__init__()
        self.latent_dim = latent_dim
        self.conv_t_up = nn.ConvTranspose2d(latent_dim, 256, 3, 1)
        self.res_up_block1 = ResUp(256, 128)
        self.res_up_block2 = ResUp(128, 64)
        self.res_up_block3 = ResUp(64,32)
        self.conv_out1 = nn.Conv2d(32, 1, 4, stride=1, padding=1)



        self.conv_out2 = nn.Conv2d(1, 16, 3, stride=1, padding=1)

        self.res_up_block4 = ResUp(16,32)
        self.res_up_block5 = ResUp(32,64)
        self.res_down_block1 = ResDown(64, 32)

        self.conv_out3 = nn.Conv2d(32, 1, 3, stride=2, padding=1)
        # self.conv_out3 = nn.Conv2d(2, 1, 3, 1, 1)

        self.act_fnc1 = nn.ELU()
        self.act_fnc2 = nn.ELU()


    def forward(self, x):
        x = x.view(x.shape[0], self.latent_dim, 1, 1)
        x = self.act_fnc1(self.conv_t_up(x))  # 4
        x = self.res_up_block1(x)  # 8
        x = self.res_up_block2(x)  # 16
        x = self.res_up_block3(x)
        x = self.conv_out1(x)
        x_final = self.conv_out2(x)
        x_final = self.res_up_block4(x_final)  # 32
        x_final = self.res_up_block5(x_final)  # 32
        x_final = self.res_down_block1(x_final)
        # x = self.act_fnc2(self.conv_out1(x))
        x_final = self.conv_out3(x_final)
        # x = torch.tanh(self.conv_out3(x))
        return x_final, x

# Combine the encoder and decoder to form the VAE
class LSTVAE(nn.Module):
    def __init__(self, latent_dim):
        super(LSTVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = LSTEncoder(latent_dim)
        self.decoder = LSTDecoder(latent_dim)

    def encode(self, x):
        return self.encoder(x)

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed_x,  x_pre = self.decode(z)
        return reconstructed_x, x_pre, mu, logvar


In [13]:
# VaeElevation = ElevationVAE(latent_dimElevation)
# VaeLST = LSTVAE(latent_dimLST)
# VaeLAI = LAIVAE(latent_dimLAI)
# VaeEvapo = EvapoVAE(latent_dimEvapo)
# VaeNPP = NppVAE(latent_dimNPP)

In [14]:
VaeElevation = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeElevation5Epoch.pt',map_location=torch.device('cpu'))
VaeLAI = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeLAI3_ME_Epoch.pt',map_location=torch.device('cpu'))
VaeNPP = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeNPP5Epoch.pt',map_location=torch.device('cpu'))
VaeEvapo = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeEvapo5Epoch.pt',map_location=torch.device('cpu'))
VaeLST = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeLST3Epoch.pt',map_location=torch.device('cpu'))

  VaeElevation = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeElevation5Epoch.pt',map_location=torch.device('cpu'))
  VaeLAI = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeLAI3_ME_Epoch.pt',map_location=torch.device('cpu'))
  VaeNPP = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeNPP5Epoch.pt',map_location=torch.device('cpu'))
  VaeEvapo = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeEvapo5Epoch.pt',map_location=torch.device('cpu'))
  VaeLST = torch.load('/content/drive/MyDrive/Colab Notebooks/vaeLST3Epoch.pt',map_location=torch.device('cpu'))


In [15]:
# Put the model in evaluation mode
VaeElevation.eval()
VaeLAI.eval()
VaeNPP.eval()
VaeEvapo.eval()
VaeLST.eval()
None  # This ensures that the output is suppressed


# Data Loader

In [16]:
import zipfile

# Replace the zip_file_path with the path to the zip file in your Google Drive
zip_file_pathEvapo = '/content/drive/MyDrive/dataEvapotranspirationTensor.zip'
zip_file_pathElevation = '/content/drive/MyDrive/dataElevationTensor.zip'
zip_file_pathLAI = '/content/drive/MyDrive/dataLAITensor.zip'
zip_file_pathLST = '/content/drive/MyDrive/dataLSTTensor.zip'
zip_file_pathMODIS2015 = '/content/drive/MyDrive/dataNPPTensor.zip'
zip_file_paths = [zip_file_pathEvapo,zip_file_pathElevation,zip_file_pathLAI,zip_file_pathLST,zip_file_pathMODIS2015]

# Replace the destination_folder with the path of the folder where you want to extract the contents
destination_folderEvapo = '/content/dataEvapotranspiration'
destination_folderElevation = '/content/dataElevation'
destination_folderLAI = '/content/dataLAI/'
destination_folderLST = '/content/dataLST'
destination_folderMODIS = '/content/dataNPP'
destination_folders = [destination_folderEvapo,destination_folderElevation,destination_folderLAI,destination_folderLST,destination_folderMODIS]

In [17]:
# Unzip the file

for i in range(len(destination_folders)):
  with zipfile.ZipFile(zip_file_paths[i], 'r') as zip_ref:
      zip_ref.extractall(destination_folders[i])

In [18]:
def re_scale(data, new_min, new_max):
    # Calculate mean and center the data around zero
    mean = data.mean()
    min_value = data.min()
    max_value = data.max()
    centered_data = data - mean

    scaled_data = (centered_data - min_value) / (max_value - min_value)  # Scale between 0 and 1
    scaled_data = scaled_data * (new_max - new_min) + new_min  # Scale to new range


    return scaled_data

# Making of the Dataset and DataLoader

In [19]:

class CustomRasterDataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, dataFrame, file_path, file_extension,windowSize,re_scale = False,new_min = -1,new_max = 1):
        'Initialization'
        self.re_scale = re_scale
        self.new_min  = new_min
        self.new_max = new_max
        self.dataFrame = dataFrame
        self.file_path = file_path
        self.file_extension = file_extension
        self.windowSize = windowSize
        self.offset = self.windowSize // 2


    def __len__(self):
        'Denotes the total number of samples'
        return len(self.dataFrame)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID =  str(self.dataFrame.iloc[index]['id']) # str(ID).rstrip('.0')

        x = self.dataFrame.iloc[index]['x']
        y = self.dataFrame.iloc[index]['y']
        OC = self.dataFrame.iloc[index]['OC']
        point_ids = self.dataFrame.iloc[index]['point_ids']
        # Load data and get label
        fullArray = torch.load(self.file_path+ID+self.file_extension)
        # Determine the window for the square
        left = x - self.offset
        right = x + (self.offset + 1)
        top = y - self.offset
        bottom = y + (self.offset + 1)
        X = fullArray[left:right,top:bottom].clone().detach()  # Access value in gpu_dictElevation
        # if self.re_scale:
          # X =  re_scale(X,self.new_min,self.new_max)
        return X.unsqueeze(0), point_ids, OC


In [20]:

class CustomRasterDatasetSmall(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, dataFrame, file_path, file_extension,windowSize,re_scale = False,new_min = -1,new_max = 1):
        'Initialization'
        self.re_scale = re_scale
        self.new_min  = new_min
        self.new_max = new_max
        self.dataFrame = dataFrame
        self.file_path = file_path
        self.file_extension = file_extension
        self.windowSize = windowSize
        self.offset = self.windowSize // 2


    def __len__(self):
        'Denotes the total number of samples'
        return len(self.dataFrame)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID =  str(self.dataFrame.iloc[index]['id']) # str(ID).rstrip('.0')

        x = self.dataFrame.iloc[index]['x']
        y = self.dataFrame.iloc[index]['y']
        OC = self.dataFrame.iloc[index]['OC']
        point_ids = self.dataFrame.iloc[index]['point_ids']
        # Load data and get label
        fullArray = torch.load(self.file_path+ID+self.file_extension)
        # Determine the window for the square
        left = x - self.offset
        right = x + (self.offset + 1)
        top = y - self.offset
        bottom = y + (self.offset + 1)
        X = fullArray[left:right,top:bottom].clone().detach()  # Access value in gpu_dictElevation
        # if self.re_scale:
          # X =  re_scale(X,self.new_min,self.new_max)
        return X.unsqueeze(0)


In [21]:
batch_sizeEvapo = 1
batch_sizeNPP = 1
batch_sizeElevation = 1
batch_sizeLAI = 1
batch_sizeLST = 1

In [22]:
windowSizeEvapo= 33
windowSizeElevation = 65
windowSizeLAI = 33
windowSizeNPP = 33
windowSizeLST = 23

In [23]:
file_pathElevation = '/content/dataElevation/'
file_pathEvapo = '/content/dataEvapotranspiration/'
file_pathLAI = '/content/dataLAI/'
file_pathLST = '/content/dataLST/'
file_pathNPP = '/content/dataNPP/'

In [24]:
file_extension = '.pt'
num_workers = 2

In [25]:
NppDfComp = NppDf.compute()
LstDfComp =  LstDf.compute()
LAIDfComp =  LAIDf.compute()
EvapoDfComp =  EvapoDf.compute()
ElevationDfComp =  ElevationDf.compute()

In [26]:
# Create the dataset instance
datasetNPP = CustomRasterDataset(NppDfComp, file_pathNPP, file_extension,windowSizeNPP)
datasetLST = CustomRasterDataset(LstDfComp, file_pathLST, file_extension,windowSizeLST)
datasetElevation = CustomRasterDataset(ElevationDfComp, file_pathElevation, file_extension,windowSizeElevation)
datasetLAI = CustomRasterDataset(LAIDfComp, file_pathLAI, file_extension,windowSizeLAI)
datasetEvapo = CustomRasterDataset(EvapoDfComp, file_pathEvapo, file_extension,windowSizeEvapo)

In [27]:
# Create the dataset instance
dimension = 1
datasetNPPSmall = CustomRasterDatasetSmall(NppDfComp, file_pathNPP, file_extension, dimension)
datasetLSTSmall = CustomRasterDatasetSmall(LstDfComp, file_pathLST, file_extension,dimension)
datasetElevationSmall = CustomRasterDatasetSmall(ElevationDfComp, file_pathElevation, file_extension, dimension)
datasetLAISmall = CustomRasterDatasetSmall(LAIDfComp, file_pathLAI, file_extension,dimension)
datasetEvapoSmall = CustomRasterDatasetSmall(EvapoDfComp, file_pathEvapo, file_extension,dimension)

In [28]:
dataLoaderEvapo = DataLoader(datasetEvapo, batch_size=batch_sizeEvapo, num_workers=num_workers, shuffle=False)
dataLoaderNPP = DataLoader(datasetNPP, batch_size=batch_sizeNPP, num_workers=num_workers, shuffle=False)
dataLoaderLST = DataLoader(datasetLST, batch_size=batch_sizeLST, num_workers=num_workers, shuffle=False)
dataLoaderLAI = DataLoader(datasetLAI, batch_size=batch_sizeLAI, num_workers=num_workers, shuffle=False)
dataLoaderElevation = DataLoader(datasetElevation, batch_size=batch_sizeElevation, num_workers=num_workers, shuffle=False)


In [29]:
dataLoaderEvapoSmall = DataLoader(datasetEvapoSmall, batch_size=batch_sizeEvapo, num_workers=num_workers, shuffle=False)
dataLoaderNPPSmall = DataLoader(datasetNPPSmall, batch_size=batch_sizeNPP, num_workers=num_workers, shuffle=False)
dataLoaderLSTSmall = DataLoader(datasetLSTSmall, batch_size=batch_sizeLST, num_workers=num_workers, shuffle=False)
dataLoaderLAISmall = DataLoader(datasetLAISmall, batch_size=batch_sizeLAI, num_workers=num_workers, shuffle=False)
dataLoaderElevationSmall = DataLoader(datasetElevationSmall, batch_size=batch_sizeElevation, num_workers=num_workers, shuffle=False)


In [30]:
lg = next(iter(dataLoaderNPP))

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


In [31]:
lg[1]

tensor([28382290])

# getting the Latent Space

In [32]:
# Check if a GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [33]:
# Move and set models to evaluation mode
VaeElevation.to(device)
VaeLAI.to(device)
VaeNPP.to(device)
VaeEvapo.to(device)
VaeLST.to(device)


LSTVAE(
  (encoder): LSTEncoder(
    (conv_in): Conv2d(1, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (res_down_block1): ResDown(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (bn1): BatchNorm2d(16, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(32, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (act_fnc): ELU(alpha=1.0)
    )
    (res_down_block2): ResDown(
      (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (bn1): BatchNorm2d(32, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
    

In [34]:
latent_dimLST = 40
latent_dimNPP = 40
latent_dimLAI = 20
latent_dimElevation = 10
latent_dimEvapo = 50

In [35]:
# Define your VAE classes and their associated DataLoaders
# Assuming VaeElevation, VaeLAI, VaeNPP, VaeEvapo, and VaeLST are your VAE models
# and dataLoaderElevation, dataLoaderLAI, dataLoaderNPP, dataLoaderEvapo, and dataLoaderLST are your DataLoaders

# Define a function to save the results
def save_results(results, file_path):
    torch.save(results, file_path)

# Initialize an optimizer if needed
# optimizer = optim.Adam(your_model.parameters())

# Define the number of epochs
num_epochs = 1  # Adjust as needed

for epoch in range(num_epochs):
    all_results = []

    for  batch_npp in dataLoaderNPP:       # Move tensors or batches to the GPU

        batch_nppTensor = batch_npp[0]

        npp_encoded_mu, npp_encoded_logvar  = VaeNPP.encode(batch_nppTensor.to(device))
        npp_encoded = VaeNPP.reparameterize(npp_encoded_mu, npp_encoded_logvar)


        # Store the results along with the second and third tensors
        result = {
            "npp_encoded": npp_encoded,
            "point_ids": batch_npp[1],  # Adjust for your specific use case
            "OC": batch_npp[2],   # Adjust for your specific use case
        }
        all_results.append(result)

    # Save the results for this epoch to a .pt file
    save_results(all_results, f'epoch_vae_npp_results.pt')

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


In [36]:
# Assuming VaeElevation, VaeLAI, VaeNPP, VaeEvapo, and VaeLST are your VAE models
# and dataLoaderElevation, dataLoaderLAI, dataLoaderNPP, dataLoaderEvapo, and dataLoaderLST are your DataLoaders

# We then define a function to save the results

# Initialize an optimizer if needed
# optimizer = optim.Adam(your_model.parameters())

# Define the number of epochs
num_epochs = 1  # Adjust as needed

for epoch in range(num_epochs):
    all_results = []

    for  batch_elevation in dataLoaderElevation:       # Move tensors or batches to the GPU
        batch_elevationTensor = batch_elevation[0]

        # Assuming VAE models have .encode() and .reparametrize() methods
        elevation_encoded_mu, elevation_encoded_logvar = VaeElevation.encode(batch_elevationTensor.to(device))
        elevation_encoded = VaeElevation.reparameterize(elevation_encoded_mu, elevation_encoded_logvar)

        # Store the results along with the second and third tensors
        result = {
            "elevation_encoded": elevation_encoded,
            "point_ids": batch_elevation[1],  # Adjust for your specific use case
            "OC": batch_elevation[2],   # Adjust for your specific use case
        }
        all_results.append(result)

    # Save the results for this epoch to a .pt file
    save_results(all_results, f'epoch_vae_elevation_results.pt')

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


In [37]:
# Define your VAE classes and their associated DataLoaders
# Assuming VaeElevation, VaeLAI, VaeNPP, VaeEvapo, and VaeLST are your VAE models
# and dataLoaderElevation, dataLoaderLAI, dataLoaderNPP, dataLoaderEvapo, and dataLoaderLST are your DataLoaders

# Define the number of epochs
num_epochs = 1  # Adjust as needed

for epoch in range(num_epochs):
    all_results = []

    for  batch_lai in dataLoaderLAI:       # Move tensors or batches to the GPU
        # batch_elevationTensor = batch_elevation[0]
        batch_laiTensor = batch_lai[0]

        # Assuming VAE models have .encode() and .reparametrize() methods
        lai_encoded_mu, lai_encoded_logvar  = VaeLAI.encode(batch_laiTensor.to(device))
        lai_encoded = VaeLAI.reparameterize(lai_encoded_mu, lai_encoded_logvar)

        # Store the results along with the second and third tensors
        result = {
            # "elevation_encoded": elevation_encoded,
            "lai_encoded": lai_encoded.reshape(latent_dimLAI),
            "point_ids": batch_lai[1],  # Adjust for your specific use case
            "OC": batch_lai[2],   # Adjust for your specific use case
        }
        all_results.append(result)

    # Save the results for this epoch to a .pt file
    save_results(all_results, f'epoch_vae_lai_results.pt')

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


In [38]:
# Define your VAE classes and their associated DataLoaders
# Assuming VaeElevation, VaeLAI, VaeNPP, VaeEvapo, and VaeLST are your VAE models
# and dataLoaderElevation, dataLoaderLAI, dataLoaderNPP, dataLoaderEvapo, and dataLoaderLST are your DataLoaders

# Define a function to save the results

# Initialize an optimizer if needed

# Define the number of epochs
num_epochs = 1  # Adjust as needed

for epoch in range(num_epochs):
    all_results = []

    for  batch_evapo in dataLoaderEvapo:       # Move tensors or batches to the GPU
        batch_evapoTensor = batch_evapo[0]

        # Assuming VAE models have .encode() and .reparametrize() methods

        evapo_encoded_mu, evapo_encoded_logvar = VaeEvapo.encode(batch_evapoTensor.to(device))
        evapo_encoded = VaeEvapo.reparameterize(evapo_encoded_mu, evapo_encoded_logvar)

        # Store the results along with the second and third tensors
        result = {

            "evapo_encoded": evapo_encoded.reshape(latent_dimEvapo),
            "point_ids": batch_evapo[1],  # Adjust for your specific use case
            "OC": batch_evapo[2],   # Adjust for your specific use case
        }
        all_results.append(result)

    # Save the results for this epoch to a .pt file
    save_results(all_results, f'epoch_vae_evapo_results.pt')

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


In [39]:
# Define your VAE classes and their associated DataLoaders
# Assuming VaeElevation, VaeLAI, VaeNPP, VaeEvapo, and VaeLST are your VAE models
# and dataLoaderElevation, dataLoaderLAI, dataLoaderNPP, dataLoaderEvapo, and dataLoaderLST are your DataLoaders

# Define a function to save the results
def save_results(results, file_path):
    torch.save(results, file_path)

# Define the number of epochs
num_epochs = 1  # Adjust as needed

for epoch in range(num_epochs):
    all_results = []

    for  batch_lst in dataLoaderLST:       # Move tensors or batches to the GPU

        batch_lstTensor = batch_lst[0]

        # Assuming VAE models have .encode() and .reparametrize() methods

        lst_encoded_mu, lst_encoded_logvar  = VaeLST.encode(batch_lstTensor.to(device))
        lst_encoded = VaeLST.reparameterize(lst_encoded_mu, lst_encoded_logvar)

        # Store the results along with the second and third tensors
        result = {
            "lst_encoded": lst_encoded.reshape(10),
            "point_ids": batch_lst[1],  # Adjust for your specific use case
            "OC": batch_lst[2],   # Adjust for your specific use case
        }
        all_results.append(result)

    # Save the results for this epoch to a .pt file
    save_results(all_results, f'epoch_vae_lst_results.pt')

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)


# Getting the 5 bands

In [40]:
import torch

def concatenate_tensors_from_dataloader(dataloader):
    concatenated_tensor = None

    for batch in dataloader:
        if concatenated_tensor is None:
            concatenated_tensor = batch
        else:
            concatenated_tensor = torch.cat((concatenated_tensor, batch), dim=0)

    return concatenated_tensor

# Example usage:
# Assuming you have a DataLoader called 'dataloader'
# concatenated_data = concatenate_tensors_from_dataloader(dataloader)


In [41]:
length = 16729
lst = concatenate_tensors_from_dataloader(dataLoaderLSTSmall).reshape((dimension,length))
elevation = concatenate_tensors_from_dataloader(dataLoaderElevationSmall).reshape((dimension,length))
npp = concatenate_tensors_from_dataloader(dataLoaderNPPSmall).reshape((dimension,length))
evapo = concatenate_tensors_from_dataloader(dataLoaderEvapoSmall).reshape((dimension,length))
lai = concatenate_tensors_from_dataloader(dataLoaderEvapoSmall).reshape((dimension,length))

  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+ID+self.file_extension)
  fullArray = torch.load(self.file_path+

In [42]:
# Create a tensor of ones of length 187
ones_tensor = torch.ones(length).reshape((1,length))


# Finish it

##Concatenating the 5 bands

In [43]:
import torch
import numpy as np


# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_npp_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("npp_encoded")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((40,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((40,1))),dim=1)

appended_tensor_npp = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((40,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((40,1))),dim=1)


In [44]:
import torch
import numpy as np


# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_evapo_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("evapo_encoded")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((50,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((50,1))),dim=1)

appended_tensor_evapo = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((50,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((50,1))),dim=1)


In [45]:

appended_tensor_evapo.shape

torch.Size([50, 16729])

In [46]:
latent_dimLST = 10
latent_dimLAI = 20
latent_Elevation = 40

In [47]:
# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_lst_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("lst_encoded")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((10,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_dimLST,1))),dim=1)

appended_tensor_lst = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((10,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_dimLST,1))),dim=1)


In [48]:
# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_elevation_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("elevation_encoded")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((latent_Elevation,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_Elevation,1))),dim=1)

appended_tensor_elevation = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((latent_Elevation,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_Elevation,1))),dim=1)


In [49]:
# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_lai_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("lai_encoded")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((latent_dimLAI,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_dimLAI,1))),dim=1)

appended_tensor_lai = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((latent_dimLAI,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((latent_dimLAI,1))),dim=1)


In [50]:
# Initialize an empty tensor
appended_tensor = None

# Define the file path
file_path = '/content/epoch_vae_lai_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Initialize a list to store evapo_encoded tensors
evapo_tensors = torch.tensor([])


# Iterate through the results and collect the evapo_encoded tensors
for result in results:
    evapo_encoded = result.get("OC")

    # Check if it's the first iteration
    if appended_tensor is None:
        appended_tensor = torch.tensor(evapo_encoded).reshape((dimension,1))
    else:
        # Append 'tensor_to_append' to 'appended_tensor' along dimension 0
        appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((dimension,1))),dim=1)

appended_tensor_OC = appended_tensor

  results = torch.load(file_path)
  appended_tensor = torch.tensor(evapo_encoded).reshape((dimension,1))
  appended_tensor = torch.cat((appended_tensor, torch.tensor(evapo_encoded).reshape((dimension,1))),dim=1)


In [51]:
ones_tensor,lst,elevation,lai,evapo,npp

(tensor([[1., 1., 1.,  ..., 1., 1., 1.]]),
 tensor([[14826.9131, 14995.0000, 14947.6953,  ..., 14394.1816, 14328.2002,
          14288.0000]]),
 tensor([[774., 461.,  33.,  ..., 107., 370., 449.]]),
 tensor([[243.0000, 248.9091, 185.2174,  ..., 167.6364, 132.8571, 112.3077]]),
 tensor([[243.0000, 248.9091, 185.2174,  ..., 167.6364, 132.8571, 112.3077]]),
 tensor([[ 8900.,  8703., 10287.,  ...,  5123.,  3742.,  4813.]]))

In [52]:
smallX = torch.cat((ones_tensor,lst,elevation,lai,evapo,npp),dim=0)

### We are putting it together

In [53]:
print('Evapo:',appended_tensor_evapo.shape)
print('LAI:',appended_tensor_lai.shape)
print('Elevation:',appended_tensor_elevation.shape)
print('LST:',appended_tensor_lst.shape)
print('NPP:',appended_tensor_npp.shape)


Evapo: torch.Size([50, 16729])
LAI: torch.Size([20, 16729])
Elevation: torch.Size([40, 16729])
LST: torch.Size([10, 16729])
NPP: torch.Size([40, 16729])


In [54]:
ones_tensor = ones_tensor.to(device)
appended_tensor_X = torch.cat((ones_tensor.to(device),appended_tensor_evapo,appended_tensor_lai,appended_tensor_elevation,appended_tensor_lst,appended_tensor_npp),dim=0)

In [55]:
NN_X = torch.cat((appended_tensor_evapo,appended_tensor_lai,appended_tensor_elevation,appended_tensor_lst,appended_tensor_npp),dim=0)

# Regression

## Smaller Regression

In [56]:
import numpy as np
import statsmodels.api as sm

# Assuming 'X' is your input features and 'Y' is your target values as NumPy arrays
X = np.array(smallX.T)
Y = np.array(appended_tensor_OC.T)

# Define the Tobit model with left-censoring (use 'r' for right-censoring)
tobit_model = sm.OLS(Y, X,'l')  # Ordinary Least Squares (OLS) with censored data

# Fit the Tobit model
results = tobit_model.fit()

# Print the summary of the Tobit regression results
print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.142
Model:                            OLS   Adj. R-squared:                  0.142
Method:                 Least Squares   F-statistic:                     694.1
Date:                Sun, 13 Oct 2024   Prob (F-statistic):               0.00
Time:                        21:14:29   Log-Likelihood:                -95032.
No. Observations:               16729   AIC:                         1.901e+05
Df Residuals:                   16724   BIC:                         1.901e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1488.5202     29.192     50.991      0.0

## Larger Regression

In [57]:
import numpy as np
import statsmodels.api as sm

# Assuming 'appended_tensor_X' and 'appended_tensor_OC' are your input features and target values as PyTorch tensors
X = np.array(appended_tensor_X.T.cpu())  # Move tensor to CPU before converting
Y = np.array(appended_tensor_OC.T.cpu())  # Move tensor to CPU before converting

# Define the Tobit model with left-censoring (use 'r' for right-censoring)
# Note: There seems to be a misunderstanding in your use of 'sm.OLS'. Ordinary Least Squares (OLS) does not inherently handle censored data.
# If you intend to use a censored regression model, consider using an appropriate method or package that supports censored data.
tobit_model = sm.OLS(Y, X)  # OLS regression model (for demonstration)

# Fit the Tobit model
results = tobit_model.fit()

# Print the summary of the regression results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.186
Model:                            OLS   Adj. R-squared:                  0.178
Method:                 Least Squares   F-statistic:                     23.61
Date:                Sun, 13 Oct 2024   Prob (F-statistic):               0.00
Time:                        21:14:30   Log-Likelihood:                -94599.
No. Observations:               16729   AIC:                         1.895e+05
Df Residuals:                   16568   BIC:                         1.908e+05
Df Model:                         160                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         94.0931      8.289     11.351      0.0

## Regression on the Best Features

In [58]:
import torch
print(torch.__version__)


2.4.1+cu121


In [59]:
np.where(results.pvalues < 0.01)[0]

array([  0,   4,  10,  13,  17,  19,  23,  36,  39,  44,  46,  47,  49,
        51,  53,  55,  59,  60,  62,  63,  68, 105, 111, 147, 153, 160])

In [60]:
# Step 1: Examine summary statistics to identify significant features
# Look at p-values in the summary and choose a significance level (e.g., 0.05)
significant_features = np.where(results.pvalues < 0.01)[0]


# Step 2: Filter the significant features
X_significant = X[:, significant_features]

# Step 3: Perform regression using only the significant features
tobit_model_significant = sm.OLS(Y, X_significant)
results_significant = tobit_model_significant.fit()

# Print the summary of the regression results with only significant features
print(results_significant.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                  0.169
Method:                 Least Squares   F-statistic:                     137.6
Date:                Sun, 13 Oct 2024   Prob (F-statistic):               0.00
Time:                        21:14:30   Log-Likelihood:                -94751.
No. Observations:               16729   AIC:                         1.896e+05
Df Residuals:                   16703   BIC:                         1.898e+05
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         82.9579      2.568     32.308      0.0

In [61]:
# appended_tensor_X = appended_tensor_lst


In [62]:
appended_tensor_X.shape

torch.Size([161, 16729])

In [63]:
appended_tensor_OC.shape

torch.Size([1, 16729])

# Some Regression Model

In [64]:
import numpy as np
from sklearn.linear_model import LinearRegression

# Assuming 'X' is your input features and 'Y' is your target values as NumPy arrays
X = np.array(appended_tensor_X.T.cpu())
Y = np.array(appended_tensor_OC.T.cpu())

# Create a linear regression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, Y)

# Get the coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_

# Make predictions
predictions = model.predict(X)


In [65]:
from sklearn.metrics import r2_score

# Calculate the R-squared value
r_squared = r2_score(Y, predictions)

# Print the R-squared value
print("R-squared:", r_squared)

R-squared: 0.1856889683454429


In [66]:
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(Y, predictions))
rmse

69.11972630603873

In [67]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# Split your data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Fit the model on the training data
model = LinearRegression()
model.fit(X_train, Y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

# Calculate adjusted R-squared
n = len(Y_test)
p = X_test.shape[1]  # Number of predictors
r_squared = r2_score(Y_test, predictions)
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)
adjusted_r_squared

0.13666291508737616

In [68]:
# Split your data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_significant, Y, test_size=0.2, random_state=42)

# Fit the model on the training data
model = LinearRegression()
model.fit(X_train, Y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

# Calculate adjusted R-squared
n = len(Y_test)
p = X_test.shape[1]  # Number of predictors
r_squared = r2_score(Y_test, predictions)
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)
adjusted_r_squared

0.1658792964707081

In [69]:
import torch

# Define file paths for the three files
file_paths = [
    '/content/epoch_vae_evapo_results.pt',
    '/content/epoch_vae_lai_results.pt',
    '/content/epoch_vae_lst_results.pt',
]

# Initialize lists to store point_ids from each file
point_ids_lists = []

# Load data and extract point_ids from each file
for file_path in file_paths:
    results = torch.load(file_path)
    point_ids = [result.get("point_id") for result in results if "point_id" in result]
    point_ids_lists.append(point_ids)

# Check if the order of point_ids is the same for all files
same_order = all(point_ids == point_ids_lists[0] for point_ids in point_ids_lists)

if same_order:
    print("The order of point_ids is the same for all files.")
else:
    print("The order of point_ids is different for one or more files.")


  results = torch.load(file_path)


The order of point_ids is the same for all files.


In [70]:
# Define the file path
file_path = '/content/epoch_vae_lst_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Assuming the file contains a list of dictionaries
# Print the first 5 items (rows)
for i, result in enumerate(results[:5]):
    print(f"Item {i + 1}:")
    for key, value in result.items():
        print(f"{key}: {value}")
    print("\n")


  results = torch.load(file_path)


Item 1:
lst_encoded: tensor([ 3.0010,  0.9062, -0.7289, -1.7117,  1.0321, -1.8240, -0.6961, -0.6529,
        -1.0488,  0.5545], device='cuda:0', requires_grad=True)
point_ids: tensor([28382290])
OC: tensor([85.4000], dtype=torch.float64)


Item 2:
lst_encoded: tensor([ 3.5936, -0.5233, -2.0914, -0.1449,  0.5875, -0.7096,  0.4047,  1.2617,
        -0.6076,  1.4135], device='cuda:0', requires_grad=True)
point_ids: tensor([28542300])
OC: tensor([14.7000], dtype=torch.float64)


Item 3:
lst_encoded: tensor([ 3.4565, -1.1784,  0.2880, -0.7027,  0.7897, -2.5051, -0.5620, -1.0091,
        -0.0845,  0.6329], device='cuda:0', requires_grad=True)
point_ids: tensor([27922352])
OC: tensor([23.2000], dtype=torch.float64)


Item 4:
lst_encoded: tensor([ 3.3966,  0.2875, -1.0935,  1.2738, -0.3078, -0.7294,  0.8841,  0.3546,
        -0.7162,  1.1394], device='cuda:0', requires_grad=True)
point_ids: tensor([27942398])
OC: tensor([89.4000], dtype=torch.float64)


Item 5:
lst_encoded: tensor([ 3.3873,  0

In [71]:
# Define the file path
file_path = '/content/epoch_vae_npp_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Assuming the file contains a list of dictionaries
# Print the first 5 items (rows)
for i, result in enumerate(results[:5]):
    print(f"Item {i + 1}:")
    for key, value in result.items():
        print(f"{key}: {value.shape}")
    print("\n")

# Define the file path
file_path = '/content/epoch_vae_elevation_results.pt'

# Load the file using torch.load
results = torch.load(file_path)

# Assuming the file contains a list of dictionaries
# Print the first 5 items (rows)
for i, result in enumerate(results[:5]):
    print(f"Item {i + 1}:")
    for key, value in result.items():
        print(f"{key}: {value.shape}")
    print("\n")



  results = torch.load(file_path)


Item 1:
npp_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 2:
npp_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 3:
npp_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 4:
npp_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 5:
npp_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])




  results = torch.load(file_path)


Item 1:
elevation_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 2:
elevation_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 3:
elevation_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 4:
elevation_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])


Item 5:
elevation_encoded: torch.Size([1, 40, 1, 1])
point_ids: torch.Size([1])
OC: torch.Size([1])




In [72]:
import shutil

source_file_path = '/content/epoch_vae_elevation_results.pt'
# Define the destination folder path
destination_folder = '/content/drive/MyDrive/MLTRANS/FilteredLucas/'

# Use shutil to move the file
try:
    shutil.move(source_file_path, destination_folder)
    print(f"File moved successfully to {destination_folder}")
except Exception as e:
    print(f"Error: {e}")

Error: Destination path '/content/drive/MyDrive/MLTRANS/FilteredLucas/epoch_vae_elevation_results.pt' already exists


# Using a MLP to regress on the y

In [73]:
Xtensor = torch.tensor(X, dtype=torch.float32)
ytensor = torch.tensor(y, dtype=torch.float32)

NameError: name 'y' is not defined

In [None]:
from torch import nn

class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(Xtensor.shape[0], 10),  # Input layer with 20 features and first hidden layer with 50 neurons
            nn.ReLU(),          # Activation function
            nn.Linear(10, 10),  # Second hidden layer with 20 neurons
            nn.ReLU(),          # Activation function
            nn.Linear(10, 1),   # Output layer with 1 output for binary classification
        )

    def forward(self, x):
        return self.layers(x)


In [None]:
model = SimpleMLP()
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Training loop
for epoch in range(100):  # number of epochs
    optimizer.zero_grad()  # clear gradients
    outputs = model(X)     # forward pass
    loss = criterion(outputs.squeeze(), y)  # calculate loss
    loss.backward()        # backward pass (compute gradients)
    optimizer.step()       # update weights

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}')


In [None]:
with torch.no_grad():  # inference mode
    outputs = model(X)
    predicted = outputs.round()  # for binary classification
    accuracy = (predicted.squeeze() == y).float().mean()
    print(f'Accuracy: {accuracy:.4f}')
