# **3. - A Neural Network**

# **3. Neural Network for Regression & Binary Classification**

In [None]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
from torch.nn import functional as F

We're going to build a Neural Network Regression & Binary Classification using `PyTorch`

## **Task 1**
---

- Create an array & matrix using `torch`

In [None]:
# Create zeros vector
bias = torch.zeros(3)

print("data:")
print(bias)
print("")
print("data type:")
print(type(bias))

data:
tensor([0., 0., 0.])

data type:
<class 'torch.Tensor'>


In [None]:
# Create random matrix
weight = torch.rand((3, 4))

print("data:")
print(weight)
print("")
print("data type:")
print(type(weight))

data:
tensor([[7.7397e-01, 7.9601e-01, 8.6669e-01, 7.6944e-01],
        [7.3907e-01, 1.0657e-04, 4.6490e-01, 8.4913e-01],
        [9.1141e-01, 2.9187e-01, 5.1530e-01, 6.7008e-01]])

data type:
<class 'torch.Tensor'>


In [None]:
# Create seed for random array/matrix in torch
torch.manual_seed(123)

weight = torch.rand((5, 2))

print("data:")
print(weight)
print("")
print("data type:")
print(type(weight))

data:
tensor([[0.2961, 0.5166],
        [0.2517, 0.6886],
        [0.0740, 0.8665],
        [0.1366, 0.1025],
        [0.1841, 0.7264]])

data type:
<class 'torch.Tensor'>


## **Task 2**
---

- Create a class to represent a pre-activation function

- Create some classes to represent activation function
  - For regression: `Linear`
  - For binary classification: `Sigmoid`
  - For hidden layer: `ReLu` and `TanH`

- Each classes has
  - `__init__` method that return the input needed for initialization (for `Linear` only)
  - `__call__` method that return the activation function
  - `parameters` method that return the model parameters

In [None]:
# For pre-activation layer (including linear activation function)
class Linear:
    """Layer of linear operation"""
    def __init__(self, feature_in, feature_out, bias=True):
        """Generate model parameter in linear layer"""
        # Generate random weight & biases
        self.weight = torch.rand((feature_in, feature_out)).double()
        if bias:
            self.bias = torch.zeros(feature_out).double()
        else:
            self.bias = None

    def __call__(self, x):
        """Return the weighted sum between model param & imputed value"""
        weighted_sum = x @ self.weight
        if self.bias is not None:
            self.out = weighted_sum + self.bias
        else:
            self.out = weighted_sum

        return self.out

    def parameters(self):
        """Collect all parameters in a list"""
        return [self.weight] + ([] if self.bias is None else [self.bias])

# For sigmoid layer
class Sigmoid:
    """Sigmoid operation"""
    def __call__(self, x):
        self.out = torch.sigmoid(x)
        return self.out

    def parameters(self):
        # no parameter included
        return []

# For ReLU layer
class ReLU:
    """ReLU operation"""
    def __call__(self, x):
        self.out = torch.relu(x)
        return self.out

    def parameters(self):
        # no parameter included
        return []

# For TanH layer
class Tanh:
    """TanH operation"""
    def __call__(self, x):
        self.out = torch.tanh(x)
        return self.out

    def parameters(self):
        # no parameter included
        return []


Let's validate

In [None]:
# Set a random seed
torch.manual_seed(123)

# Generate a linear layer of 5 input with 1 output & bias
lyr = Linear(feature_in=5, feature_out=2, bias=True)

# Pred
xs = torch.tensor([1., 2., 3., 4., 5.]).double()
ys = lyr(xs)

print('Layer object :', lyr)
print('Layer param  :\n', lyr.parameters())
print('Called param :', ys)

Layer object : <__main__.Linear object at 0x7c0abd0b2140>
Layer param  :
 [tensor([[0.2961, 0.5166],
        [0.2517, 0.6886],
        [0.0740, 0.8665],
        [0.1366, 0.1025],
        [0.1841, 0.7264]], dtype=torch.float64), tensor([0., 0.], dtype=torch.float64)]
Called param : tensor([2.4880, 8.5354], dtype=torch.float64)


In [None]:
# Next, let's do the TanH function given the previous Linear layer
next_lyr = Tanh()

# Return the output
ys_tanh = next_lyr(ys)

print('Layer object :', next_lyr)
print('Layer param  :', next_lyr.parameters())
print('Called param :', ys_tanh)

Layer object : <__main__.Tanh object at 0x7c0abd0b2920>
Layer param  : []
Called param : tensor([0.9863, 1.0000], dtype=torch.float64)


Nice!

## **Task 3**
---

- From the previous tasks, we know that we can create a sequential operation to do a feed forward network.
- Create a `Sequential` class to build a feed forward neural network where we can manually design the network architectures.
- E.g.
```python
layers = [
    Linear(3, 4), ReLU(),  # 1st layer, 3 inputs 4 neuron ReLU act. func.
    Linear(4, 2), ReLU(),  # 2nd layer, 4 inputs 2 neurons ReLU
    Linear(2, 1)           # Output layer, for Regression cases
]
```

In [None]:
import copy

class Sequential:
    """Create a sequential procedures to build a neural network"""
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, x):
        x = copy.deepcopy(x)

        # Iterate on given layers
        for layer in self.layers:
            x = layer(x)
        self.out = x
        return self.out

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]


Let's validate

In [None]:
# Say you want to create
# A neural network with
#  - 5 input
#  - 3 hidden layer -> [4, 4, 3]
#  - using TanH activation function in each layer
#  - 1 output layer
torch.manual_seed(123)

# Create layer
layers = [
    Linear(feature_in=5, feature_out=4), Tanh(),
    Linear(feature_in=4, feature_out=4), Tanh(),
    Linear(feature_in=4, feature_out=3), Tanh(),
    Linear(feature_in=3, feature_out=1)
]

# Create sequential model
mdl = Sequential(layers = layers)

print(mdl)

<__main__.Sequential object at 0x7c0abd0b1000>


In [None]:
# Pred
xs = torch.tensor([1., 2., 3., 4., 5.]).double()
ys = mdl(xs)

print('Layer object :', mdl)
print('Layer param  :\n', mdl.parameters())
print('Called param :', ys)

Layer object : <__main__.Sequential object at 0x7c0abd0b1000>
Layer param  :
 [tensor([[0.2961, 0.5166, 0.2517, 0.6886],
        [0.0740, 0.8665, 0.1366, 0.1025],
        [0.1841, 0.7264, 0.3153, 0.6871],
        [0.0756, 0.1966, 0.3164, 0.4017],
        [0.1186, 0.8274, 0.3821, 0.6605]], dtype=torch.float64), tensor([0., 0., 0., 0.], dtype=torch.float64), tensor([[0.8536, 0.5932, 0.6367, 0.9826],
        [0.2745, 0.6584, 0.2775, 0.8573],
        [0.8993, 0.0390, 0.9268, 0.7388],
        [0.7179, 0.7058, 0.9156, 0.4340]], dtype=torch.float64), tensor([0., 0., 0., 0.], dtype=torch.float64), tensor([[0.0772, 0.3565, 0.1479],
        [0.5331, 0.4066, 0.2318],
        [0.4545, 0.9737, 0.4606],
        [0.5159, 0.4220, 0.5786]], dtype=torch.float64), tensor([0., 0., 0.], dtype=torch.float64), tensor([[0.9455],
        [0.8057],
        [0.6775]], dtype=torch.float64), tensor([0.], dtype=torch.float64)]
Called param : tensor([2.2478], dtype=torch.float64)


## **Task 4**
---

- Now, let's create a loss function.
- In regression task, we define loss as a mean squared error

$$
MSE(\mathbf{y}, \hat{\mathbf{y}})
=
\cfrac{1}{N}
\sum_{i=1}^{N}
(y_{i} - \hat{y}_{i})^{2}
$$

- Named the function `mse`

In [None]:
def mse_loss(target, input):
    return torch.mean((target-input)**2)

Let's check

In [None]:
# The answer should be 1.
mse_loss(target = torch.tensor([1., 1., 1., 1.]),
         input = torch.tensor([0., 0., 0., 0.]))

tensor(1.)

## **Task 5**
---

- Let's perform a prediction with a Neuron
- Optimize your model parameter using Gradient Descent

*Load Library*

In [None]:
# Load library
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

*Prepare data*

In [None]:
# Load data
X, y = load_diabetes(return_X_y=True)

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test,
                                                    test_size = 0.5,
                                                    random_state = 42)

X_train.shape, X_valid.shape, X_test.shape

((353, 10), (44, 10), (45, 10))

In [None]:
# Fit a scaler
scaler = StandardScaler()
scaler.fit(X_train)

In [None]:
# Transform data with scaler
X_train_clean = scaler.transform(X_train)
X_valid_clean = scaler.transform(X_valid)
X_test_clean = scaler.transform(X_test)

In [None]:
# Transform data to torch.tensor format
Xs_train = torch.tensor(X_train_clean)
Xs_valid = torch.tensor(X_valid_clean)
Xs_test = torch.tensor(X_test_clean)

ys_train = torch.tensor(y_train).reshape(-1, 1)
ys_valid = torch.tensor(y_valid).reshape(-1, 1)
ys_test = torch.tensor(y_test).reshape(-1, 1)

*Create baseline model*

In [None]:
from sklearn.dummy import DummyRegressor

In [None]:
# Create object
mdl_baseline = DummyRegressor(strategy='mean')

# Fit object
mdl_baseline.fit(Xs_train, ys_train)

# Predict
y_train_pred = mdl_baseline.predict(Xs_train)
y_valid_pred = mdl_baseline.predict(Xs_valid)

# Convert result to torch tensor
ys_train_pred = torch.tensor(y_train_pred)
ys_valid_pred = torch.tensor(y_valid_pred)

# Show scores
mse_train_baseline = mse_loss(ys_train_pred, ys_train)
mse_valid_baseline = mse_loss(ys_valid_pred, ys_valid)

print('Baseline MSE score - Train :', mse_train_baseline.item())
print('Baseline MSE score - Valid :', mse_valid_baseline.item())

Baseline MSE score - Train : 6076.398012984615
Baseline MSE score - Valid : 4977.640423791074


*Great! Now let's create a Neuron model*

In [None]:
torch.manual_seed(123)

# Neuron model
layers = [
    Linear(feature_in=10, feature_out=1, bias=True)
]

# Create the model
mdl_neuron = Sequential(layers = layers)

# Activate the computational graph gradient
parameters = mdl_neuron.parameters()
for p in parameters:
    p.requires_grad = True

In [None]:
# Now, let's perform the Gradient Descent
for k in range(1000):
    # Do the forward pass to get prediction & loss
    y_pred = mdl_neuron(Xs_train)
    loss = mse_loss(y_pred, ys_train)

    # Do the backpropagation
    # Always reset the parameter gradient every iterations
    for p in parameters:
        p.grad = None

    # Then, do the backpropagation
    loss.backward()

    # Now, let's update the model
    for p in parameters:
        p.data += -0.1 * p.grad

    # Print
    if k==0 or k==999 or k%100==0:
        print(f'iter: {k+1} | loss: {loss.item():.4f}')


# Show final scores
ys_train_pred = mdl_neuron(Xs_train)
ys_valid_pred = mdl_neuron(Xs_valid)

print('Neuron MSE score - Train :', mse_loss(ys_train_pred, ys_train).item())
print('Neuron MSE score - Valid :', mse_loss(ys_valid_pred, ys_valid).item())

iter: 1 | loss: 29553.3843
iter: 101 | loss: 2889.9139
iter: 201 | loss: 2883.3081
iter: 301 | loss: 2878.7473
iter: 401 | loss: 2875.5961
iter: 501 | loss: 2873.4186
iter: 601 | loss: 2871.9141
iter: 701 | loss: 2870.8744
iter: 801 | loss: 2870.1560
iter: 901 | loss: 2869.6597
iter: 1000 | loss: 2869.3195
Neuron MSE score - Train : 2869.316661272341
Neuron MSE score - Valid : 2755.875224468187


*Nice! There is a huge improvement in train & valid dataset*

In [None]:
# parameters
for p in parameters:
    print(p)

tensor([[  1.7891],
        [-11.4982],
        [ 25.7320],
        [ 16.7887],
        [-37.9602],
        [ 19.5654],
        [  4.8145],
        [ 12.2476],
        [ 32.6712],
        [  2.3983]], dtype=torch.float64, requires_grad=True)
tensor([153.7365], dtype=torch.float64, requires_grad=True)


*Remember, 1 neuron = a linear regression model. Let's prove this*

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

mdl_lr = LinearRegression()
mdl_lr.fit(Xs_train, ys_train)

lr_train_score = mean_squared_error(mdl_lr.predict(Xs_train), ys_train)
lr_valid_score = mean_squared_error(mdl_lr.predict(Xs_valid), ys_valid)

print('LR MSE score - Train :', lr_train_score)
print('LR MSE score - Valid :', lr_valid_score)
print('Model parameters:')
print(mdl_lr.coef_, mdl_lr.intercept_)

LR MSE score - Train : 2868.549702835577
LR MSE score - Valid : 2765.853591002306
Model parameters:
[[  1.75375799 -11.51180908  25.60712144  16.82887167 -44.44885564
   24.64095356   7.67697768  13.1387839   35.16119521   2.35136365]] [153.73654391]


## **Task 6**
---

- Let's perform a prediction with a Neural Network
- Optimize your model parameter using Gradient Descent

*Now, let's create a Neural Network*

In [None]:
torch.manual_seed(123)

# Neural Network model
# 1 hidden layer, 6 neuron, TanH activation function
layers = [
    Linear(feature_in=10, feature_out=6, bias=True), Tanh(),
    Linear(feature_in=6, feature_out=1, bias=True)
]

# Create the model
mdl_nn = Sequential(layers = layers)

# Activate the computational graph gradient
parameters = mdl_nn.parameters()
for p in parameters:
    p.requires_grad = True

In [None]:
# Now, let's perform the Gradient Descent
for k in range(1000):
    # Do the forward pass to get prediction & loss
    y_pred = mdl_nn(Xs_train)
    loss = mse_loss(y_pred, ys_train)

    # Do the backpropagation
    # Always reset the parameter gradient every iterations
    for p in parameters:
        p.grad = None

    # Then, do the backpropagation
    loss.backward()

    # Now, let's update the model
    for p in parameters:
        p.data += -0.1 * p.grad

    # Print
    if k==0 or k==999 or k%100==0:
        print(f'iter: {k+1} | loss: {loss.item():.4f}')


# Show final scores
ys_train_pred = mdl_nn(Xs_train)
ys_valid_pred = mdl_nn(Xs_valid)

print('Neuron MSE score - Train :', mse_loss(ys_train_pred, ys_train).item())
print('Neuron MSE score - Valid :', mse_loss(ys_valid_pred, ys_valid).item())

iter: 1 | loss: 29539.4910
iter: 101 | loss: 2779.2292
iter: 201 | loss: 2547.1046
iter: 301 | loss: 2464.0943
iter: 401 | loss: 2473.8480
iter: 501 | loss: 2491.3797
iter: 601 | loss: 2446.9829
iter: 701 | loss: 2447.0473
iter: 801 | loss: 2447.2638
iter: 901 | loss: 2447.3159
iter: 1000 | loss: 2446.7670
Neuron MSE score - Train : 2445.987691411018
Neuron MSE score - Valid : 2822.952408174069


*Nice!, we have better train score, but beware of the model <u>overfit</u>*

## **Exercise**
---

- Build your own Neural Network Model
- You can design your own architecture
- Compare the results with your initial

In [None]:
# Code here

## **Task 7**
---

- For the binary classification network, we need to build the loss function.
- Remember, we can use the *binary crossentropy loss* or *negative log-likelihood*

$$
NLL = \cfrac{1}{N}
\sum_{i=1}^{N}
\left[
\sum_{j=1}^{C}
\mathbf{1}_{y^{(i)}=c} \cdot \log (P(y^{(i)}=c|x^{(i)}))
\right]
$$

- Named the function as `bce_loss`

In [None]:
def bce_loss(target, input):
    return -torch.mean(input*torch.log(target+1e-16) + (1-input)*torch.log(1-(target+1e-16)))

Let's check

In [None]:
# The answer should be 0.2523
bce_loss(target = torch.tensor([0.9, 0.5, 0.1, 0.1]),
         input = torch.tensor([1, 1, 0, 0]))

tensor(0.2523)

## **Task 8**
---

- Let's perform a binary classification prediction with a Neuron
- Optimize your model parameter using Gradient Descent

*Load Library*

In [None]:
# Load library
from sklearn.datasets import load_iris

*Prepare data*

In [None]:
# Load data
X, y = load_iris(return_X_y=True)
y = np.where(y==2, 1, 0)

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test,
                                                    test_size = 0.5,
                                                    random_state = 42)

X_train.shape, X_valid.shape, X_test.shape

((120, 4), (15, 4), (15, 4))

In [None]:
# Fit a scaler
scaler = StandardScaler()
scaler.fit(X_train)

In [None]:
# Transform data with scaler
X_train_clean = scaler.transform(X_train)
X_valid_clean = scaler.transform(X_valid)
X_test_clean = scaler.transform(X_test)

In [None]:
# Transform data to torch.tensor format
Xs_train = torch.tensor(X_train_clean)
Xs_valid = torch.tensor(X_valid_clean)
Xs_test = torch.tensor(X_test_clean)

ys_train = torch.tensor(y_train).reshape(-1, 1)
ys_valid = torch.tensor(y_valid).reshape(-1, 1)
ys_test = torch.tensor(y_test).reshape(-1, 1)

*Create baseline model*

In [None]:
from sklearn.dummy import DummyClassifier

In [None]:
# Create object
mdl_baseline = DummyClassifier(strategy='most_frequent')

# Fit object
mdl_baseline.fit(Xs_train, ys_train)

# Predict
y_train_pred = mdl_baseline.predict_proba(Xs_train)[:, 1]
y_valid_pred = mdl_baseline.predict_proba(Xs_valid)[:, 1]

# Convert result to torch tensor
ys_train_pred = torch.tensor(y_train_pred)
ys_valid_pred = torch.tensor(y_valid_pred)

# Show scores
bce_train_baseline = bce_loss(ys_train_pred, ys_train)
bce_valid_baseline = bce_loss(ys_valid_pred, ys_valid)

print('Baseline BCE score - Train :', bce_train_baseline.item())
print('Baseline BCE score - Valid :', bce_valid_baseline.item())

Baseline BCE score - Train : 11.973442483569041
Baseline BCE score - Valid : 17.192635361022212


*Great! Now let's create a Neuron model*

In [None]:
torch.manual_seed(123)

# Neuron model
layers = [
    Linear(feature_in=4, feature_out=1, bias=True), Sigmoid()
]

# Create the model
mdl_neuron = Sequential(layers = layers)

# Activate the computational graph gradient
parameters = mdl_neuron.parameters()
for p in parameters:
    p.requires_grad = True

In [None]:
# Now, let's perform the Gradient Descent
for k in range(1000):
    # Do the forward pass to get prediction & loss
    y_pred = mdl_neuron(Xs_train)
    loss = bce_loss(y_pred, ys_train)

    # Do the backpropagation
    # Always reset the parameter gradient every iterations
    for p in parameters:
        p.grad = None

    # Then, do the backpropagation
    loss.backward()

    # Now, let's update the model
    for p in parameters:
        p.data += -0.1 * p.grad

    # Print
    if k==0 or k==999 or k%100==0:
        print(f'iter: {k+1} | loss: {loss.item():.4f}')


# Show final scores
ys_train_pred = mdl_neuron(Xs_train)
ys_valid_pred = mdl_neuron(Xs_valid)

print('Neuron BCE score - Train :', bce_loss(ys_train_pred, ys_train).item())
print('Neuron BCE score - Valid :', bce_loss(ys_valid_pred, ys_valid).item())

iter: 1 | loss: 0.0678
iter: 101 | loss: 0.0675
iter: 201 | loss: 0.0673
iter: 301 | loss: 0.0671
iter: 401 | loss: 0.0668
iter: 501 | loss: 0.0666
iter: 601 | loss: 0.0664
iter: 701 | loss: 0.0662
iter: 801 | loss: 0.0660
iter: 901 | loss: 0.0657
iter: 1000 | loss: 0.0655
Neuron BCE score - Train : 0.06553821298705326
Neuron BCE score - Valid : 0.040261311328580913


*Nice! There is a huge improvement in train & valid dataset*

In [None]:
# parameters
for p in parameters:
    print(p)

tensor([[-0.5392],
        [-1.5131],
        [ 5.2418],
        [ 6.0195]], dtype=torch.float64, requires_grad=True)
tensor([-7.6034], dtype=torch.float64, requires_grad=True)


*Remember, 1 neuron = a linear regression model. Let's prove this*

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

mdl_lr = LogisticRegression(penalty="none")
mdl_lr.fit(Xs_train, ys_train.flatten())

lr_train_score = log_loss(ys_train.flatten(), mdl_lr.predict_proba(Xs_train))
lr_valid_score = log_loss(ys_valid.flatten(), mdl_lr.predict_proba(Xs_valid))

print('LR BCE score - Train :', lr_train_score)
print('LR BCE score - Valid :', lr_valid_score)
print('Model parameters:')
print(mdl_lr.coef_, mdl_lr.intercept_)

LR BCE score - Train : 0.04657793270205176
LR BCE score - Valid : 0.02114910543256611
Model parameters:
[[-1.75013656 -3.06911108 14.50150061 12.33229584]] [-18.62682849]


## **Task 9**
---

- Let's perform a prediction with a Neural Network
- Optimize your model parameter using Gradient Descent

*Now, let's create a Neural Network*

In [None]:
torch.manual_seed(123)

# Neural Network model
# 1 hidden layer, 6 neuron, TanH activation function
layers = [
    Linear(feature_in=4, feature_out=3, bias=True), Tanh(),
    Linear(feature_in=3, feature_out=2, bias=True), Tanh(),
    Linear(feature_in=2, feature_out=1, bias=True), Sigmoid()
]

# Create the model
mdl_nn = Sequential(layers = layers)

# Activate the computational graph gradient
parameters = mdl_nn.parameters()
for p in parameters:
    p.requires_grad = True

In [None]:
# Now, let's perform the Gradient Descent
for k in range(1000):
    # Do the forward pass to get prediction & loss
    y_pred = mdl_nn(Xs_train)
    loss = bce_loss(y_pred, ys_train)

    # Do the backpropagation
    # Always reset the parameter gradient every iterations
    for p in parameters:
        p.grad = None

    # Then, do the backpropagation
    loss.backward()

    # Now, let's update the model
    for p in parameters:
        p.data += -0.1 * p.grad

    # Print
    if k==0 or k==999 or k%100==0:
        print(f'iter: {k+1} | loss: {loss.item():.4f}')


# Show final scores
ys_train_pred = mdl_nn(Xs_train)
ys_valid_pred = mdl_nn(Xs_valid)

print('Neuron BCE score - Train :', bce_loss(ys_train_pred, ys_train).item())
print('Neuron BCE score - Valid :', bce_loss(ys_valid_pred, ys_valid).item())

iter: 1 | loss: 0.0262
iter: 101 | loss: 0.0245
iter: 201 | loss: 0.0228
iter: 301 | loss: 0.0212
iter: 401 | loss: 0.0196
iter: 501 | loss: 0.0181
iter: 601 | loss: 0.0167
iter: 701 | loss: 0.0154
iter: 801 | loss: 0.0143
iter: 901 | loss: 0.0132
iter: 1000 | loss: 0.0122
Neuron BCE score - Train : 0.012227674938569438
Neuron BCE score - Valid : 0.016059593978176502


*Nice!, we have better train score, but beware of the model <u>overfit</u>*

In [None]:
# parameters
for p in parameters:
    print(p)

tensor([[ 1.9349, -0.3910, -0.3746],
        [ 2.3500, -0.7671, -0.5239],
        [ 0.4740,  1.7023,  1.2191],
        [ 0.8445,  0.4981,  0.7815]], dtype=torch.float64, requires_grad=True)
tensor([-0.0442, -1.1513, -1.6828], dtype=torch.float64, requires_grad=True)
tensor([[1.1042, 1.3135],
        [1.5660, 1.6125],
        [1.6127, 2.0754]], dtype=torch.float64, requires_grad=True)
tensor([-0.6029, -0.6148], dtype=torch.float64, requires_grad=True)
tensor([[3.6079],
        [4.4077]], dtype=torch.float64, requires_grad=True)
tensor([-0.3816], dtype=torch.float64, requires_grad=True)


## **Exercise**
---

- Build your own Neural Network Model
- You can design your own architecture
- Compare the results with your initial

In [None]:
# Code here