<a href="https://colab.research.google.com/github/ge43jef/GEEHYDRO/blob/block5/NN_camels_fluxnet_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Network with pytorch
In this lab, you will learn how to implement a neural network simple, we will use a library called pytorch to bulid up the neural network we did last lab.

In [None]:
#pip install -U torchmetrics

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn
from torchmetrics.classification import MulticlassAccuracy
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

## B1. Define your dataset

In [None]:
class Dataset(torch.utils.data.Dataset):
    '''
    Prepare the dataset for neural network
    '''

    def __init__(self, X, y):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            # Apply scaling if necessary

            self.X = torch.from_numpy(X).type(torch.float)
            self.y = torch.from_numpy(y).type(torch.float)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

## B2. NN Binary classification (example data of last lab)

### B2.1 Data creation

In [None]:
X = np.array([[21.04,5,0.5,90], [14.16,3,1,80], [8.52,2,0.5,70
], [7.52,2.3,1,80]])
y = np.array([0, 0, 1 , 1 ])
print(X.shape)
print(y.shape)

### B2.2 Define the NN model

In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(4, 5),
            nn.ReLU(),
            nn.Linear(5, 1),
            nn.Sigmoid(),
        )
    def forward(self, x):
        x = self.layers(x)
        return x

### B2.3 Model training

In [None]:
if __name__ == '__main__':
    dataset = Dataset(X.T , y)
    trainloader = DataLoader(dataset, batch_size = 4)
    mlp = Network()

    # Define the loss function and optimizer
    loss_function = nn.BCELoss()
    optimizer = torch.optim.SGD(mlp.parameters(), lr=0.001)

    # Run the training loop
    for epoch in range(0, 100):

        # Print epoch
        print(f'Starting epoch {epoch + 1}')

        # Set current loss value
        current_loss = 0.0

        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
            inputs, targets = data
            targets = targets.reshape(targets.shape[0] , 1)

            # Zero the gradients
            optimizer.zero_grad()
            outputs = mlp(inputs)

            loss = loss_function( outputs , targets )

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            current_loss += loss.item()
            if (i+1) % 1 == 0:
                print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, loss.item()))
                current_loss = 0.0


    # Process is complete.
    #print('Training process has finished.')

### Please check the [reference](https://torchmetrics.readthedocs.io/en/stable/classification/accuracy.html) for the accuracy calculation

## B3. NN Multiple classification (camel data of last lab)

### B3.1 Model define

In [None]:
class MLP_class(nn.Module):
    '''
      Multilayer Perceptron for classification.
    '''

    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
        )

    def forward(self, x):
        x = self.layers(x)
        return x

### B3.2 Load the dataset
In this part, we will use camels dataset same as we used in the previous lab to perform the classification model.

In [None]:
with open('camels_topo.txt') as f:
    lines = f.readlines()

num_of_rows = len(lines)

var = np.zeros((num_of_rows - 1 , 4)) # save first fourth variables
                                      # in the files

for num in range(1 , num_of_rows): # we don't need the first row
    xx = lines[num] # variable to save each line of lines
    l = []
    for t in xx.split(';'):
        try:
            l.append(float(t))
        except ValueError:
            pass
    var[num - 1 , :] = l[0 : 4]

var = pd.DataFrame(var , columns = ['catchment_idx','lat','lon','elev'])

var['elev_class'] , range_of_quantile = pd.qcut(var['elev'], 4, labels=False , retbins=True)
var = np.array(var)

X = var[: , [2,1]]
y = var[: , 4]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

### B3.3 Model training

In [None]:
if __name__ == '__main__':

    # Set fixed random number seed
    torch.manual_seed(52)
    dataset = Dataset(X_train, y_train)
    trainloader = DataLoader(dataset, batch_size = 100)
    mlp = MLP_class()

    # Define the loss function and optimizer
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.001)

    # Run the training loop
    for epoch in range(0, 1000):

        # Print epoch
        print(f'Starting epoch {epoch + 1}')

        # Set current loss value
        current_loss = 0.0

        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
            inputs, targets = data
            targets = targets.type(torch.LongTensor)

            # Zero the gradients
            optimizer.zero_grad()
            outputs = mlp(inputs)
            loss = loss_function( outputs , targets )

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            current_loss += loss.item()
            if (i+1) % 3 == 0:
                print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, loss.item()))
                current_loss = 0.0

    # Process is complete.
    print('Training process has finished.')

### B3.4 Model test

In [None]:
dataset = Dataset(X_test, y_test)
testloader = DataLoader(dataset, batch_size = 1000)
m = nn.Softmax(dim=1)

with torch.no_grad():
    for i, data in enumerate(testloader, 0):
         # Get and prepare inputs
        inputs, targets = data
        targets = targets.type(torch.LongTensor)
        outputs = mlp(inputs)
        output = m(outputs)
        output = torch.argmax(output, dim=1) # convert the probabity to label index
        accuracymetric = MulticlassAccuracy(num_classes=4)
        accuracy =accuracymetric(output, targets)
print(output.detach())
print(targets)
print(accuracy)

In [None]:
fig, ax = plt.subplots(2, 1 , figsize=(6, 10))
plt.subplots_adjust(wspace=0.4, hspace=0.2)

cmap = plt.cm.get_cmap('PiYG', 4)
ax[0].scatter(X[: , 0] , X[: , 1])
sc1 = ax[0].scatter(X_test[: , 0] , X_test[: , 1] , c=output, cmap=cmap, s=20, edgecolors="k")
ax[0].set_title("Our prediction on test data")
ax[1].set_title("True value")
sc2 = ax[1].scatter(X[: , 0] , X[: , 1] , c=y, cmap=cmap, s=20, edgecolors="k")
bounds = [0, 1, 2, 3, 4]
plt.colorbar(sc1 , ticks=bounds , ax = ax[0])
plt.colorbar(sc2 , ticks=bounds , ax = ax[1])
plt.show()

## B4. NN for regression (FLUXNET data of previous lab)

In [None]:
def normalization(x):
    x= (x-min(x)) / (max(x) - min(x))
    return x

In [None]:
class MLP_regree(nn.Module):
    '''
      Multilayer Perceptron for classification.
    '''

    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(6, 12),
            nn.ReLU(),
            nn.Linear(12, 32),
            nn.ReLU(),
            nn.Linear(32, 6),
            nn.ReLU(),
            nn.Linear(6, 1),

        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
data = pd.read_csv('FLX_US-Ne1_FLUXNET2015_SUBSET_DD_2001-2013_1-4.csv' , delimiter=",", skipinitialspace=True,  parse_dates=True)

meteo = pd.DataFrame(
            {"sw": data.SW_IN_F, "lw": data.LW_IN_F, "tmp": data.TA_F,
             "pre": data.PA_F, "u10": data.WS_F,  "vpd": data.VPD_F , "lh": data.LE_CORR})

data_all = np.array(meteo)

for i in np.arange(7):
    data_all[: , i] = normalization(data_all[:,i])


X = data_all[ : , 0:6]
y = data_all[ : , 6]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [None]:
if __name__ == '__main__':

    # Set fixed random number seed
    torch.manual_seed(4078)
    dataset = Dataset(X_train, y_train)
    trainloader = DataLoader(dataset, batch_size = 500 , shuffle=True)
    mlp = MLP_regree()

    # Define the loss function and optimizer
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.0001)

    # Run the training loop
    for epoch in range(0, 1000):

        # Print epoch
        print(f'Starting epoch {epoch + 1}')

        # Set current loss value
        current_loss = 0.0

        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
            inputs, targets = data
            #inputs, targets = inputs.float(), targets.float()
            targets = targets.reshape((targets.shape[0], 1))

            # Zero the gradients
            optimizer.zero_grad()
            outputs = mlp(inputs)
            loss = loss_function(outputs , targets )

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            current_loss += loss.item()
            if (i+1) % 3 == 0:
                print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, loss.item()))
                current_loss = 0.0

    # Process is complete.
    print('Training process has finished.')

In [None]:
dataset = Dataset(X_test, y_test)
testloader = DataLoader(dataset, batch_size = len(dataset))

with torch.no_grad():
    for i, data in enumerate(testloader, 0):
         # Get and prepare inputs
        inputs, targets = data
        output = mlp(inputs)
        output = torch.reshape(output, (-1,))
        loss = loss_function(output,targets)

print('MSE: '+ str(loss.item()))

In [None]:
# plot the data
# Plot the data points
fig = plt.figure()
fig,ax=plt.subplots(2, 1, figsize=(6, 12), sharey=True)
ax[0].plot( y_test, marker='x', c='r',label='True Value')
ax[0].plot( output , c='b',label='Our Prediction on test data')
ax[0].set(xlabel="time (day)", ylabel="evaporation rate (normalized)")
ax[1].scatter( y_test , output.flatten() , c='b')
z = np.polyfit(y_test , output.flatten() , 1)
y_hat = np.poly1d(z)(output)
plt.plot(output.flatten(), y_hat, "r--", lw=2)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y_test, y_hat):0.3f}$\n" \
                   f"$RMSE = {mean_squared_error(y_test, y_hat, squared=False):0.3f} $ "
plt.gca().text(0.05, 0.95, text, transform=plt.gca().transAxes,
                           fontsize=14, verticalalignment='top')
plt.ylabel('Predict Value')
# Set the x-axis label
plt.xlabel('True Value')
ax[0].legend()
plt.show()

### B5. Final case
### Now let us implement a complete deep learning project involving several standard steps:
B5.1 feature scaling\
B5.2 hyperparameter of neural network\
B5.3 cross validation\
B5.4 final model optimization\
B5.5 visualization\

### B5.1. Feature scaling

In [None]:
def normalization(x):
    x= (x-min(x)) / (max(x) - min(x))
    return x

data = pd.read_csv('FLX_US-Ne1_FLUXNET2015_SUBSET_DD_2001-2013_1-4.csv' , delimiter=",", skipinitialspace=True,  parse_dates=True)

meteo = pd.DataFrame(
            {"sw": data.SW_IN_F, "lw": data.LW_IN_F, "tmp": data.TA_F,
             "pre": data.PA_F, "u10": data.WS_F,  "vpd": data.VPD_F , "lh": data.LE_CORR})

data_all = np.array(meteo)

for i in np.arange(7):
    data_all[: , i] = normalization(data_all[:,i])


X = data_all[ : , 0:6]
y = data_all[ : , 6]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

### B5.2. Hyperparameter of neural network
Change the number of layers, and neuron numbers manually.

In [None]:
class MLP_regree(nn.Module):
    '''
      Multilayer Perceptron for classification.
    '''

    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(6, 12),
            nn.ReLU(),
            nn.Linear(12, 32),
            nn.ReLU(),
            nn.Linear(32, 6),
            nn.ReLU(),
            nn.Linear(6, 1),

        )

    def forward(self, x):
        x = self.layers(x)
        return x

### B5.3. Cross validation
Optimize or fine-tune more hyperparameter such as learning rate and batch size
In your future work, you can use optimization algorithm such as "grid search" for hyperparameter tuning

In [None]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=5, shuffle=True)
if __name__ == '__main__':

    # Set fixed random number seed
    torch.manual_seed(4078)
    dataset = Dataset(X_train, y_train)
    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):

        print(f'FOLD {fold}')
        print('--------------------------------')
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
        trainloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=100, sampler=train_subsampler )
        testloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=100 , sampler=test_subsampler)
        mlp = MLP_regree()
        loss_function = nn.MSELoss()
        optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.0001)

    # Run the training loop
        for epoch in range(0, 100):

        # Print epoch
            print(f'Starting epoch {epoch + 1}')

        # Set current loss value
            current_loss = 0.0

        # Iterate over the DataLoader for training data
            for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
                inputs, targets = data
            #inputs, targets = inputs.float(), targets.float()
                targets = targets.reshape((targets.shape[0], 1))

            # Zero the gradients
                optimizer.zero_grad()
                outputs = mlp(inputs)
                loss = loss_function( outputs , targets )

            # Perform backward pass
                loss.backward()

            # Perform optimization
                optimizer.step()

            # Print statistics

                if i % 10 == 0:
                    print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, loss.item()))
                current_loss = 0.0

    # Process is complete.
    print('Training process has finished.')

### B5.4. Final model optimization
Best hyperparameter combinations based on cross validation performance

In [None]:
if __name__ == '__main__':
    loss_values = []
    # Set fixed random number seed
    torch.manual_seed(4078)
    dataset = Dataset(X_train, y_train)
    trainloader = DataLoader(dataset, batch_size = 500 , shuffle=True)
    mlp = MLP_regree() # model with optimal hyperparameter combinations

    # Define the loss function and optimizer
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.0001)

    # Run the training loop
    for epoch in range(0, 100):

        # Print epoch
        print(f'Starting epoch {epoch + 1}')

        # Set current loss value
        current_loss = 0.0

        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
            inputs, targets = data
            #inputs, targets = inputs.float(), targets.float()
            targets = targets.reshape((targets.shape[0], 1))

            # Zero the gradients
            optimizer.zero_grad()
            outputs = mlp(inputs)
            loss = loss_function(outputs , targets )

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()
            current_loss += loss.item()

            # Print statistics
            loss_values.append(current_loss / len(trainloader))
            if i % 5 == 0:
                print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, loss.item()))
            current_loss = 0.0

    # Process is complete.
    print('Training process has finished.')
    plt.plot(loss_values)

### B5.5. Visualization

In [None]:
# performance on test data
dataset = Dataset(X_test, y_test)
testloader = DataLoader(dataset, batch_size = 1000)

with torch.no_grad():
    for i, data in enumerate(testloader, 0):
         # Get and prepare inputs
        inputs, targets = data
        output = mlp(inputs)


# Plot the data points
fig = plt.figure()
fig,ax=plt.subplots(2, 1, figsize=(6, 12), sharey=True)
ax[0].plot( y_test, marker='x', c='r',label='True Value')
ax[0].plot( output , c='b',label='Our Prediction on test data')
ax[0].set(xlabel="time (day)", ylabel="evaporation rate (normalized)")
ax[1].scatter( y_test , output.flatten() , c='b')
z = np.polyfit(y_test , output.flatten() , 1)
y_hat = np.poly1d(z)(output)
plt.plot(output.flatten(), y_hat, "r--", lw=2)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y_test, y_hat):0.3f}$\n" \
                   f"$RMSE = {mean_squared_error(y_test, y_hat, squared=False):0.3f} $ "
plt.gca().text(0.05, 0.95, text, transform=plt.gca().transAxes,
                           fontsize=14, verticalalignment='top')
plt.ylabel('Predict Value')
# Set the x-axis label
plt.xlabel('True Value')
ax[0].legend()
plt.show()