# Lab 7 - Introduction to PyTorch

Tensors are data structures that are similar to arrays and matrices. Tensors are similar to NumPy arrays and they can run on GPUs or other specialized hardware to accelerate computing. PyTorch is a machine learning framework that allows us to create, train, and test models. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters.

In [None]:
import torch
import numpy as np

## Tensors

### Creating a Tensor

In [None]:
# Creating tensors from data

data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)

print(f"Tensor from data: \n {x_data} \n")

Tensor from data: 
 tensor([[1, 2],
        [3, 4]]) 



In [None]:
# Creating tensors from a NumPy array

np_array = np.array(data)
x_np = torch.from_numpy(np_array)

print(f"Tensor from NumPy array: \n {x_np} \n")

Tensor from NumPy array: 
 tensor([[1, 2],
        [3, 4]]) 



In [None]:
# Create tensors of a specific shape

shape = (2, 3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor of shape {shape}: \n{rand_tensor}\n")
print(f"Ones Tensor of shape {shape}: \n{ones_tensor}\n")
print(f"Zero Tensor of shape {shape}: \n{zeros_tensor}\n")


Random Tensor of shape (2, 3): 
tensor([[0.0936, 0.6726, 0.8078],
        [0.7598, 0.4147, 0.0898]])

Ones Tensor of shape (2, 3): 
tensor([[1., 1., 1.],
        [1., 1., 1.]])

Zero Tensor of shape (2, 3): 
tensor([[0., 0., 0.],
        [0., 0., 0.]])



In [None]:
# Create a tensor filled with 1's, with the shape of some other tensor

x_ones = torch.ones_like(x_data)
print(f"Ones Tensor: \n {x_ones} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 



In [None]:
# Create a tensor filled random numbers, with the shape of some other tensor

x_rand = torch.rand_like(x_data, dtype=torch.float)
print(f"Random Tensor: \n {x_rand} \n")

Random Tensor: 
 tensor([[0.1273, 0.5321],
        [0.6923, 0.0168]]) 



### Tensor Attributes and Operations

In [None]:
# Getting attributes of a tensor

tensor = torch.rand(3, 4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Size  of tensor: {tensor.size()} (same as shape)")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Size  of tensor: torch.Size([3, 4]) (same as shape)
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [None]:
# Almost all tensor operations are typically much faster on a GPU/TPU, when compared to a CPU
# Getting a GPU for Google Colab Notebooks: Go to Edit -> Notebook Settings -> Hardware accelerator

if torch.cuda.is_available():
  tensor = tensor.to('cuda')
print(f"Device tensor is stored on: {tensor.device}")

Device tensor is stored on: cuda:0


In [None]:
# Changing dimensions of a tensor

tensor = torch.rand(3, 4)
new_tensor = tensor.view(12, 1)
print(f"Original Tensor: \n {tensor} \n")
print(f"New Tensor: \n {new_tensor} \n")


Original Tensor: 
 tensor([[0.2447, 0.9544, 0.0597, 0.7642],
        [0.8241, 0.6032, 0.0464, 0.4630],
        [0.4666, 0.7372, 0.0474, 0.2222]]) 

New Tensor: 
 tensor([[0.2447],
        [0.9544],
        [0.0597],
        [0.7642],
        [0.8241],
        [0.6032],
        [0.0464],
        [0.4630],
        [0.4666],
        [0.7372],
        [0.0474],
        [0.2222]]) 



In [None]:
# Tensor splicing and indexing, just like how it is done in NumPy

tensor = torch.ones(4, 4)
tensor[:,2] = 0
print(tensor)

tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]])


In [None]:
# Tensor `stack` and `cat` operations

# Concatenates along a given dimension
cat_tensor = torch.cat([tensor, tensor, tensor], dim=1)
print(f"Concatenated Tensor: \n {cat_tensor} \n")

# Concatenates along a new dimension
stack_tensor = torch.stack([tensor, tensor, tensor])
print(f"Stacked Tensor: \n {stack_tensor} \n")


Concatenated Tensor: 
 tensor([[1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.]]) 

Stacked Tensor: 
 tensor([[[1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.]],

        [[1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.]],

        [[1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.],
         [1., 1., 0., 1.]]]) 



In [None]:
# Element-wise tensor multiplication

print(f"Tensor: \n {tensor} \n")
print(f"Multiplied Tensor: \n {tensor.mul(tensor)} \n")
print(f"Multiplied Tensor: \n {tensor * tensor} \n")

Tensor: 
 tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]]) 

Multiplied Tensor: 
 tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]]) 

Multiplied Tensor: 
 tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]]) 



In [None]:
# Matrix multiplication for tensors

print(f"Tensor: \n {tensor} \n")
print(f"Multiplied Tensor: \n {tensor.matmul(tensor)} \n")
print(f"Multiplied Tensor: \n {tensor @ tensor} \n")

Tensor: 
 tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]]) 

Multiplied Tensor: 
 tensor([[3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.]]) 

Multiplied Tensor: 
 tensor([[3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.]]) 



In [None]:
# Matrix multiplication for tensors

print(f"Tensor: \n {tensor} \n")
print(f"Multiplied Tensor: \n {tensor.matmul(tensor.T)} \n")
print(f"Multiplied Tensor: \n {tensor @ tensor.T} \n")

Tensor: 
 tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]]) 

Multiplied Tensor: 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

Multiplied Tensor: 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 



In [None]:
# Inplace operations for tensors, problematic when computing derivatives because of an immediate loss of history

print(tensor, "\n")
tensor.add_(5)
print(tensor)

tensor([[11., 11., 10., 11.],
        [11., 11., 10., 11.],
        [11., 11., 10., 11.],
        [11., 11., 10., 11.]]) 

tensor([[16., 16., 15., 16.],
        [16., 16., 15., 16.],
        [16., 16., 15., 16.],
        [16., 16., 15., 16.]])


### Autograd for tensors

Autograd is now a core torch package for automatic differentiation. It uses a tape based system for automatic differentiation. In autograd, if any input Tensor of an operation has requires_grad=True, the computation will be tracked. In the forward phase, the autograd tape will remember all the operations it executed, and in the backward phase, it will replay the operations.

In [None]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [None]:
# The tensor y is created via an addition operation
y = x + 2
print(y, y.grad_fn)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>) <AddBackward0 object at 0x7efb3edee850>


In [None]:
# The tensor y is created via a multiplication operation

b = (y * y)
print(b, b.grad_fn)

# The tensor y is created via a matrix multiplication operation

b = (y @ y)
print(b, b.grad_fn)

# The tensor y is created via a power operation

b = (y ** 2)
print(b, b.grad_fn)

tensor([[9., 9.],
        [9., 9.]], grad_fn=<MulBackward0>) <MulBackward0 object at 0x7efb3eedbc50>
tensor([[18., 18.],
        [18., 18.]], grad_fn=<MmBackward0>) <MmBackward0 object at 0x7efb3eedbc50>
tensor([[9., 9.],
        [9., 9.]], grad_fn=<PowBackward0>) <PowBackward0 object at 0x7efb3eedbc50>


In [None]:
with torch.no_grad():
    print((x ** 2).requires_grad)

False


## Writing a Simple Neural Network with Pytorch

#### Importing necessary libraries

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### Define a Neural network class

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        
        n_features = 512
        
        self.linear_relu_stack = nn.Sequential(
        nn.Linear(n_features, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
    )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = NeuralNetwork().to(device)
print(model)


Using cuda device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


#### Running the above neural network

In [None]:
n_features = 512
X = torch.rand(10, n_features, device=device)

predictions = model(X)
print(f"Predicted class: {predictions}")

Predicted class: tensor([[ 0.0008],
        [-0.0400],
        [-0.0390],
        [-0.0653],
        [-0.0525],
        [-0.0667],
        [-0.0205],
        [-0.0420],
        [-0.0463],
        [-0.0145]], device='cuda:0', grad_fn=<AddmmBackward0>)


#### Breaking down

In [None]:
input_tensor = torch.rand(10, n_features, device=device)
print(input_tensor.size())

torch.Size([10, 512])


#### A linear layer:

In [None]:
layer1 = nn.Linear(in_features=n_features, out_features=256).to(device)
hidden1 = layer1(input_tensor)
print(hidden1.size())


torch.Size([10, 256])


#### Activation function

Non-linear activations are what create the complex mappings between the model’s inputs and outputs. They are applied after linear transformations to introduce nonlinearity, helping neural networks learn a wide variety of phenomena.


We Use the ReLU activation function here. 


In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")



Before ReLU: tensor([[-0.3035,  0.1893,  0.4050,  ..., -0.0494, -0.5079,  0.3359],
        [-0.2047, -0.0540, -0.0645,  ...,  0.2083, -0.1241,  0.3469],
        [-0.2541, -0.1377, -0.0246,  ..., -0.0854, -0.3087,  0.4069],
        ...,
        [-0.3571,  0.0507, -0.1421,  ...,  0.1342, -0.5627,  0.0896],
        [ 0.0119, -0.0150,  0.2757,  ...,  0.0856, -0.3203,  0.0565],
        [-0.5549, -0.0077,  0.2778,  ...,  0.3448, -0.5390, -0.0488]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.1893, 0.4050,  ..., 0.0000, 0.0000, 0.3359],
        [0.0000, 0.0000, 0.0000,  ..., 0.2083, 0.0000, 0.3469],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.4069],
        ...,
        [0.0000, 0.0507, 0.0000,  ..., 0.1342, 0.0000, 0.0896],
        [0.0119, 0.0000, 0.2757,  ..., 0.0856, 0.0000, 0.0565],
        [0.0000, 0.0000, 0.2778,  ..., 0.3448, 0.0000, 0.0000]],
       device='cuda:0', grad_fn=<ReluBackward0>)


#### Pytorch Sequential module

In [None]:
seq_modules = nn.Sequential(
    layer1,
    nn.ReLU(),
    nn.Linear(256, 1)
).to(device)


input_tensor = torch.rand(10,n_features, device=device)
logits = seq_modules(input_tensor)
print(logits)


tensor([[-0.1510],
        [-0.0480],
        [-0.1422],
        [ 0.0507],
        [-0.1209],
        [-0.0808],
        [ 0.0631],
        [ 0.0565],
        [ 0.0648],
        [-0.1106]], device='cuda:0', grad_fn=<AddmmBackward0>)


#### Last layer for prediction

Since this is a regression task, no activation function is required for last layer.

In a classification task, the last linear layer of the neural network returns logits - raw values in [-infty, infty]. The are passed to the nn.Softmax module and the logits are scaled to values [0, 1] representing the model’s predicted probabilities for each class. dim parameter indicates the dimension along which the values must sum to 1.



#### Neural Network parameters

Layers inside a neural network are parameterized, i.e. have associated weights and biases that are optimized during training. Subclassing nn.Module automatically tracks all fields defined inside your model object, and makes all parameters accessible using your model’s parameters() or named_parameters() methods.

In this example, we iterate over each parameter, and print its size and a preview of its values.



In [None]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")



Model structure: NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([256, 512]) | Values : tensor([[-0.0430, -0.0385, -0.0381,  ..., -0.0442, -0.0035,  0.0032],
        [-0.0386,  0.0177,  0.0319,  ...,  0.0089,  0.0431, -0.0330]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([256]) | Values : tensor([-0.0222, -0.0281], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([128, 256]) | Values : tensor([[-0.0303, -0.0112,  0.0470, -0.0317,  0.0123, -0.0117, -0.0432,  0.0420,
          0.0117,  0.0582, -0.0577, -0.0357, -0.0451, -0.0578, -0.0235, -0.0568,
         -0.0503, -0.0112,  0.0198, -0.0409, -0.0571, -0.0284

#### Neural Network hyperparameters

Hyperparameters are adjustable parameters that let you control the model optimization process. Different hyperparameter values can impact model training and convergence rates 

We define the following hyperparameters for training:
- Number of Epochs - the number times to iterate over the dataset
- Batch Size - the number of data samples propagated through the network before the parameters are updated
- Learning Rate - how much to update models parameters at each batch/epoch. Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training.


In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

#### Loss function

When presented with some training data, our untrained network is likely not to give the correct answer. Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training. To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value.

Common loss functions include nn.MSELoss (Mean Square Error) for regression tasks, and nn.NLLLoss (Negative Log Likelihood) for classification. nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.



In [None]:
# Initialize the loss function
loss_fn = nn.MSELoss()



#### Optimizer

Optimization is the process of adjusting model parameters to reduce model error in each training step. Optimization algorithms define how this process is performed (in this example we use Stochastic Gradient Descent). All optimization logic is encapsulated in the optimizer object. Here, we use the SGD optimizer; additionally, there are many different optimizers available in PyTorch such as ADAM and RMSProp, that work better for different kinds of models and data.

We initialize the optimizer by registering the model’s parameters that need to be trained, and passing in the learning rate hyperparameter.




In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

Inside the training loop, optimization happens in three steps:

- Call optimizer.zero_grad() to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.

- Backpropagate the prediction loss with a call to loss.backward(). PyTorch deposits the gradients of the loss w.r.t. each parameter.

- Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass.


### The full model and training loop

In [None]:
def train_loop(X_train, Y_train, loss_fn, optimizer):
    size = len(X_train)
    batch_size = 32
    n_batches = int(size/batch_size)
    epochs = 50

    for epoch_id in range(epochs):
      running_loss = []
      for batch_id in range(n_batches):
          X_batch = X_train[batch_id*batch_size: (batch_id+1)*batch_size]
          Y_batch = Y_train[batch_id*batch_size: (batch_id+1)*batch_size]
          X_batch = torch.FloatTensor(X_batch)
          Y_batch = torch.FloatTensor(Y_batch)

          # Compute prediction and loss
          pred = model(X_batch)
          loss = loss_fn(pred.squeeze(-1), Y_batch)
          running_loss.append(loss.item())
          
          # Backpropagation
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          loss, current = loss.item(), batch_id * len(X)
      print(f" Epoch: {epoch_id} loss: {np.mean(running_loss):>7f}")


### RDKit Installation



In [None]:
!wget -c https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
!chmod +x Miniconda3-py37_4.8.3-Linux-x86_64.sh
!time bash ./Miniconda3-py37_4.8.3-Linux-x86_64.sh -b -f -p /usr/local
!time conda install -q -y -c rdkit rdkit

import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

--2022-04-30 12:29:14--  https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
Resolving repo.continuum.io (repo.continuum.io)... 104.18.200.79, 104.18.201.79, 2606:4700::6812:c94f, ...
Connecting to repo.continuum.io (repo.continuum.io)|104.18.200.79|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh [following]
--2022-04-30 12:29:14--  https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.130.3, 104.16.131.3, 2606:4700::6810:8303, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.130.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 88867207 (85M) [application/x-sh]
Saving to: ‘Miniconda3-py37_4.8.3-Linux-x86_64.sh’


2022-04-30 12:29:14 (105 MB/s) - ‘Miniconda3-py37_4.8.3-Linux-x86_64.sh’ saved [88867207/88867207]

PREFIX=/usr/local
Unp

In [None]:
import pandas as pd
import numpy as np
import rdkit
from tqdm.auto import tqdm

from rdkit import Chem
from rdkit.Chem import AllChem


In [None]:
df = pd.read_csv('./esol.csv')
df = df[['measured log solubility in mols per litre','smiles']]
X = []
Y = []

for idx, row in tqdm(df.iterrows()):
  smile = row['smiles']
  sol = row['measured log solubility in mols per litre']

  fingerprint = np.asarray(AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smile),2,nBits=512))

  X.append(fingerprint)
  Y.append(sol)

X = np.vstack(X)
Y=  np.hstack(Y)
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_valid, X_test, Y_valid, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)
print(X_train.shape,X_valid.shape,X_test.shape)
print(Y_train.shape,Y_valid.shape,Y_test.shape)

0it [00:00, ?it/s]

(902, 512) (113, 512) (113, 512)
(902,) (113,) (113,)


In [None]:
model = model.to('cpu')

#### Training the dataset

In [None]:
train_loop(X_train, Y_train, loss_fn, optimizer)

 Epoch: 0 loss: 12.674489
 Epoch: 1 loss: 11.373038
 Epoch: 2 loss: 10.126612
 Epoch: 3 loss: 8.877158
 Epoch: 4 loss: 7.604825
 Epoch: 5 loss: 6.348720
 Epoch: 6 loss: 5.236157
 Epoch: 7 loss: 4.428834
 Epoch: 8 loss: 3.972012
 Epoch: 9 loss: 3.755182
 Epoch: 10 loss: 3.646449
 Epoch: 11 loss: 3.574550
 Epoch: 12 loss: 3.513891
 Epoch: 13 loss: 3.456918
 Epoch: 14 loss: 3.401556
 Epoch: 15 loss: 3.347135
 Epoch: 16 loss: 3.293344
 Epoch: 17 loss: 3.240063
 Epoch: 18 loss: 3.187191
 Epoch: 19 loss: 3.134694
 Epoch: 20 loss: 3.082635
 Epoch: 21 loss: 3.031038
 Epoch: 22 loss: 2.979998
 Epoch: 23 loss: 2.929576
 Epoch: 24 loss: 2.879701
 Epoch: 25 loss: 2.830434
 Epoch: 26 loss: 2.781814
 Epoch: 27 loss: 2.733915
 Epoch: 28 loss: 2.686769
 Epoch: 29 loss: 2.640363
 Epoch: 30 loss: 2.594876
 Epoch: 31 loss: 2.550318
 Epoch: 32 loss: 2.506732
 Epoch: 33 loss: 2.464096
 Epoch: 34 loss: 2.422389
 Epoch: 35 loss: 2.381635
 Epoch: 36 loss: 2.341772
 Epoch: 37 loss: 2.302792
 Epoch: 38 loss: 2.

#### Testing

In [None]:
def test_loop(X_test, Y_test, loss_fn):
    size = len(X_test)
    batch_size = 32
    n_batches = int(size/batch_size)
    running_loss = []
    model.eval()

    with torch.no_grad():
      for batch_id in range(n_batches):
          X_batch = X_test[batch_id*batch_size: (batch_id+1)*batch_size]
          Y_batch = Y_test[batch_id*batch_size: (batch_id+1)*batch_size]
          X_batch = torch.FloatTensor(X_batch)
          Y_batch = torch.FloatTensor(Y_batch)

          # Compute prediction and loss
          pred = model(X_batch)
          loss = loss_fn(pred.squeeze(-1), Y_batch)
          running_loss.append(loss.item())

          loss, current = loss.item(), batch_id * len(X)

          
      print(f" MSE: {np.mean(running_loss):>7f}")


In [None]:
test_loop(X_test, Y_test, loss_fn)

 MSE: 2.050858


## Assignment: Toxicity classification


You are given a dataset of SMILES and whether they are toxic or not. 
Task is to create a neural network model for predicting whether a given molecule is toxic or not. 



In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        n_features = 512
        self.linear_relu_stack = nn.Sequential(
        nn.Linear(n_features, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
    )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = NeuralNetwork().to(device)
print(model)


Using cuda device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [None]:
loss_fn = nn.BCEWithLogitsLoss()