In [None]:
import os
from typing import Tuple, List, Dict, Any
import numpy as np
import matplotlib.pyplot as plt

# sklearn
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

# Keras (Torch) as backend
os.environ["KERAS_BACKEND"] = "torch"   # IF NOT SPECIFIED, TensorFlow will be used as BACKEND
import keras
from keras.optimizers import SGD

# Under construction
<font color="red"><b>Author: Wim R.M. Cardoen (wcardoen@gmail.com)</b></font><br>
If you stumble by chance on this website and find errors, have comments/suggestions, please send 
an email to the author!<br>

# Logistic regression using Keras & PyTorch
* For <font color="green"><b>more advanced networks</b></font> most AI/deep learing practitioners:
  - neither derive the underlying equations.
  - nor implement these equations from scratch.<br>
  although it is not particularly complex.
* However, implementing <font color="green"><b>highly performant code</b></font>
  for the general case is significantly more <font color="green"><b>demanding and time-consuming</b></font>.<br>It requires:
  - Mastery of a <font color="red"><b>compiled language</b></font> (e.g., `C++`)
  - Understanding of <font color="red"><b>parallel computing</b></font> (multi-node, multi-GPU setups).
  - A solid foundation in <font color="red"><b>algorithms and numerical analysis</b></font>.
* To address these kind of challenges, humanity adopted over time a
  <a href="https://www.marxists.org/reference/archive/smith-adam/works/wealth-of-nations/book01/ch01.htm"><b>division of labour</b></a>.   
* Instead of building everything from scratch, practitioners rely on <font color="green"><b>frameworks</b></font>.<br>The most commonly used frameworks are currently:
  - <a href="https://pytorch.org/"><b>PyTorch</b></a>
  - <a href="https://www.tensorflow.org/"><b>TensorFlow</b></a>
  - <a href="https://docs.jax.dev/en/latest/"><b>Jax</b></a>
  - <a href="https://keras.io/"><b>Keras</b></a>
  
Our goal is to implement the logistic regression model (<a href="./lecture1.ipynb"><b>Lecture 1</b></a>) using Keras & PyTorch.

We will proceed in two different ways:
1. by using <font color="green"><b>Keras</b></font> (PyTorch as backend) : more user-friendly 
2. by using <font color="green"><b>PyTorch</b></font> as such: low-level but versatile.

In <a href="./lecture2.ipynb"><b>Lecture 2</b></a> we will use Keras but will also provide its lower level counter part (as addendum).

In [None]:
# Generate a data set
X, y = make_moons(n_samples=500, noise=0.25, random_state=42)
print(f"Generate the data set ...")
print(f"  X.shape:{X.shape}")
print(f"  y.shape:{y.shape}")

# Split the data in training and a test set.
test_ratio = 0.30
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_ratio, random_state=42)
print(f"Splitting the data set (splitting ratio:{test_ratio})")
print(f"  Training Data Set:")
print(f"    X_train.shape : {X_train.shape}")
print(f"    y_train.shape : {y_train.shape}")
print(f"  Test Data Set:")
print(f"    X_test.shape  : {X_test.shape}")
print(f"    y_test.shape  : {y_test.shape}")

# 1.Keras (with PyTorch backend)

### A.Set up the model

In [None]:
# Setting up the model
model = keras.Sequential([
          keras.layers.Input(shape=(2,)),                      # Input layer: input vector (2 features)
          keras.layers.Dense(units=1, activation='sigmoid')])  # Output layer: 1 Class
print(model.summary())

# Info on kernel_regularization, etc.
print(f"Info on the layers ...")
for layer in model.layers:
    print(f"  Layer:'{layer.name}'")
    print(layer.get_config())

### B.Compile the model

In [None]:
# Compilation of the model
optimizer=SGD(learning_rate=0.075)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

print(f"Compilation models")
print(f"  Optimizer: {model.optimizer}")
print(f"  Optimizer Config: {model.optimizer.get_config()}")
print(f"  Loss Function: {model.loss}")
print(f"  Metrics: {model.metrics}")

### C.Train the model

In [None]:
# Fit the model
history = model.fit(X_train, y_train, epochs=1000, verbose=0)
#print(history.history)
loss = history.history['loss']
accuracy=history.history['accuracy']
it = np.arange(len(loss))+1

In [None]:
plt.title("Loss of the training data")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.plot(it,loss,"-")
plt.show()

In [None]:
# Final weights and bias
print(f"Parameters at the end of the training")
for layer in model.layers:
    weights, biases = layer.get_weights()
    print(f"  Layer   : {layer.name}")
    print(f"  Weights : {np.ravel(weights)}")
    print(f"  Bias    : {biases}")

### D.Evalution of the model

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Evaluation of the test set")
print(f"  Accuracy : {test_accuracy:8.4f}")
print(f"  Loss     : {test_loss:8.4f}")

# 2.PyTorch

### A.Loading the data
* We have 2 PyTorch data classes (to be discussed later):
  + data.Dataset : to load/create data in a class<br>
    requires: \_\_init\_\_(), \_\_len\_\_(), \_\_getitem\_\_()
  + data.DataLoader:: to load data in batches
* For the time being (conversion to PyTorch Tensor)

In [None]:
# Generate the PyTorch Tensors from the NumPy Data
# Note: default torch.float32
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1,1)  # 2D 
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1,1)    # 2D

In [None]:
# Check Conversion from NumPy to PyTorch Tensor
START, END= 0, 2
print(f"NumPy data::")
print(f"  X_train: {X_train.shape}\n{X_train[START:END]}")
print(f"  y_train: {y_train.shape}\n{y_train[START:END]}")
print(f"\nPyTorch data::")
print(f"  X_train_tensor: {X_train_tensor.shape}\n{X_train_tensor[START:END]}")
print(f"  y_train_tensor: {y_train_tensor.shape}\n{y_train_tensor[START:END]}")

### B.The Deep Neural Net (DNN) Model

In [None]:
# Define the logistic Regression module using PyTorch
class LogisticRegressionModel1(nn.Module):

    def __init__(self, num_inputs):

        # The class inherits from the class nn.Module
        super(LogisticRegressionModel1,self).__init__()

        # Define a Single LAYER object which connects 
        #     the input with 1 single output 
        self.linear = nn.Linear(num_inputs, 1)

        # Create the ACTIVATION (object) for the Single Layer
        self.act_fn = nn.Sigmoid()

    def forward(self, x):

        # Applies the forward propagation
        z = self.linear(x)
        a = self.act_fn(z)
        return a

In [None]:
model1 = LogisticRegressionModel1(num_inputs=2)
print(f"  Logistic Model:{model1}")

In [None]:
# In PyTorch, you can use either the parameters() function
# or the names_parameters() function
for name, param in model1.named_parameters():
    print(f"Name:{name:20s} -> param:{param.shape}")
    print(f"{param.data}\n")  

### C.Loss/Objective function
* In order to find the <font color="green"><b>optimal parameters</b></font> for the weights and bias, we need
  to have an <font color="green"><b>objective function</b></font> (a.k.a Loss function)
* There are several options:
  + <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#bceloss"><b>nn.BCELoss()</b></a>: Binary Cross entropy => inputs need to be $[0,1]$
  + <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html#torch.nn.BCEWithLogitsLoss"><b>nn.BCEWithLogitLoss()</b></a>: Numerically more stable because of the combination of sigmoid and loss function at once.

In [None]:
# Creates an instance of the Binary Cross Entropy Criterion
loss_fn1 = nn.BCELoss()

### D.Optimization
* There are several methods to <font color="green"><b>optimize</b></font> the Loss function/Objective function.<br>
  - In this example we will use the <a href="https://docs.pytorch.org/docs/stable/generated/torch.optim.SGD.html#sgd"><b>Stochastic Gradient Descent (SGD)</b></a> method<br>
    (<a href="https://docs.pytorch.org/docs/stable/optim.html"><b>torch.optim module</b></a>).
  - Later on, we will describe more powerful <font color="green"><b>optimization algorithms</b></font> (Adam, ...).
* Useful methods:
  + step(): method update parameters
  + zero_grad() : sets the gradients of ALL optimized parameters to zero.

In [None]:
optim1 = optim.SGD(model1.parameters(), lr=0.005)

### E.Assembling the components \{A,B,C,D\}

#### 1.Training
* Goal: Obtain the optimized parameters i.e. <font color="green"><b>weight matrix</b></font> and <font color="green"><b>bias</b></font>
* If the data set is **small**, then we will use **all** the training data at **once**.
* Terminology:
  - **One** complete iteration over **all** training data: <font color="green"><b>epoch</b></font>
  - For **larger** training data sets, each <font color="green"><b>epoch</b></font> is split into <font color="green"><b>batches</b></font>.<br>
    * The gradient and the parameters are <font color="blue"><b>updated</b></font> after every batch (points are selected <font color="blue"><b>randomly</b></font>):<br>
      <font color="green"><b>stochastic gradient descent (SGD)</b></font>
    * The batch size is a <font color="green"><b>hyperparameter</b></font>. 

In [None]:
def train_model(X_train_tensor, y_train_tensor, model, loss_fn, optim, num_epochs=100000 , delta_print=10000):
    """"
    Function which trains the model
    """
    
    # Set model to train mode
    # Strictly not necessary for our case 
    model.train()

    # Loop over the epochs
    for epoch in range(num_epochs):

        # PART A: FORWARD PROPAGATION ( => )
        # Step 1: Generate the output (activation of the linear layer)
        output = model(X_train_tensor)

        # Step 2: Use the activation of the last layer & the labels
        #         to calculate the loss.
        loss = loss_fn(output, y_train_tensor)

        # Step B: BACK PROPAGATION ( <= )
        # Step 3: Calculate the gradients of the parameters
        optim.zero_grad()   # Init. the gradients to ZERO!!
        loss.backward()     # Calc. grad. of param.

        # Step 4: Adjust the parameters 
        optim.step()

        if (epoch+1)%delta_print == 0 or epoch==0:
           print(f"  Epoch {epoch+1}/{num_epochs}  Loss:{loss.item():.6f}")
                
    return loss.item()       

In [None]:
# Train the model

final_loss1 = train_model(X_train_tensor, y_train_tensor, model1, loss_fn1, optim1)
print(f"Loss in the last step:{final_loss1:.6f}")

In [None]:
# HOW TO Check the final parameters
#   Method 1:
print(f"METHOD 1::")
print(f"Weights::\n{model1.linear.weight}\n")
print(f"Bias   ::\n{model1.linear.bias}")

print(f"\nMETHOD 2::")
for name, param in model1.state_dict().items():
    print(f"{name} -> {param.shape}")
    print(f"  {param.data}")

#### 2.Save/load the model to & from disk
* To <font color="green"><b>save</b></font> an object to disk, use <a href="https://docs.pytorch.org/docs/stable/generated/torch.save.html#torch-save"><b>torch.save()</b></a>
* To <font color="green"><b>load</b></font> an object from disk, use <a href="https://docs.pytorch.org/docs/stable/generated/torch.load.html#torch-load"><b>torch.load()</b></a>

In [None]:
filename='linreg1.pth'
torch.save(model, filename)
newmodel = torch.load(filename, weights_only=False)

#### 3.Obtain predictions

In [None]:
def test_model(X_tensor, y_tensor, model):

    model.eval()
    with torch.no_grad():
        res_tensor = model(X_tensor)
        ypred_tensor =(res_tensor>0.5).float()
    return ypred_tensor    

def get_accuracy(y_pred, y):
    num_ok = float((y_pred == y).sum())
    return (num_ok / y_pred.shape[0]) * 100.0

In [None]:
y_trainpred_tensor = test_model(X_train_tensor, y_train_tensor, newmodel)
acc_train = get_accuracy(y_trainpred_tensor, y_train_tensor)
print(f"Accuracy train:{acc_train:8.4f}")

y_testpred_tensor = test_model(X_test_tensor, y_test_tensor, newmodel)
acc_test = get_accuracy(y_testpred_tensor, y_test_tensor)
print(f"Accuracy test:{acc_test:8.4f}")

### F. Alternative implementation within PyTorch (numerical stability)

In the previous section, we implemented the 
* <font color="green"><b>activation function</b></font> $a_i$, i.e.<br>
  $\begin{eqnarray}
     a_i & = & \sigma(z_i)\\
         & = & \frac{1}{1+e^{-z_i}} \\
  \end{eqnarray}$
* loss function $\mathcal{L}^{(i)}$, i.e.<br>
  $\begin{eqnarray}
       \mathcal{L}^{(i)} & = & - \bigg [ y_i \log(a_i) + (1-y_i)\log(1-a_i) \bigg ] 
  \end{eqnarray}$<br>
separately.

To render the <font color="green"><b>optimization numerically more stable</b></font> the <font color="green"><b>activation and the loss function</b></font> can be combined into **one** function.<br>
The corresponding loss function bears the name <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html#torch.nn.BCEWithLogitsLoss"><b>BCEWithLogitsLoss</b></a>
and is given by:

$\begin{eqnarray}
  \mathcal{L}^{(i)} & = & -\bigg [ y_i \log(a_i) + (1-y_i)\log(1-a_i) \bigg ]\\ 
                    & = & z_i(1-y_i) + \log(1+e^{-z_i}) \\
\end{eqnarray}$

#### **Exercise 1**:
* Implement the `class LogisticRegressionModel2(nn.Module)`

In [None]:
# Define the logistic Regression module using PyTorch 
class LogisticRegressionModel2(nn.Module):

    def __init__(self, num_inputs):

        # The class inherits from the class nn.Module
        super(LogisticRegressionModel2,self).__init__()

        # <--- YOUR CODE

    def forward(self, x):

        # Applies the forward propagation
        # <--- YOUR CODE

In [None]:
# %load solutions/kerastorch/sol_ex1.py

Check the model <font color="blue"><b>(ante optimization)</b></font>

In [None]:
model2 = LogisticRegressionModel2(num_inputs=2)
print(f"  Logistic Model:{model2}")

for name, param in model2.named_parameters():
    print(f"Name:{name:20s} -> param:{param.shape}")
    print(f"{param.data}\n")

#### **Exercise 2**:
* Implement the `loss_fn2` using BCEWithLogitsLoss

In [None]:
# Here comes your code to define the BCEWithLogitsLoss
loss_fn2 = # <--- Here comes your code

In [None]:
# %load solutions/kerastorch/sol_ex2.py

In [None]:
optim2 = optim.SGD(model2.parameters(), lr=0.005)
final_loss2 = train_model(X_train_tensor, y_train_tensor, model2, loss_fn2, optim2)
print(f"Loss in the last step:{final_loss2:.6f}")

Check the model <font color="blue"><b>(post optimization)</b></font>

In [None]:
for name, param in model2.named_parameters():
    print(f"Name:{name:20s} -> param:{param.shape}")
    print(f"{param.data}\n")