<div style="line-height:1.2;">

<h1 style="color:#BF66F2; margin-bottom: 0.3em;"> Convolutional Neural Networks in PyTorch 2 </h1>

<h4 style="margin-top: 0.3em; margin-bottom: 1em;"> Two Examples with two different CNN classes from modelled on the torch.nn.Module.  </h4>

<div style="line-height:1.4; margin-bottom: 0.5em;">
    <h3 style="color: lightblue; display: inline; margin-right: 0.5em;">Keywords:</h3>
    GridSearchCV + RandomizedSearchCV + LeaveOneOut + torch.cuda.amp.GradScaler()
</div>

</div>

In [1]:
import numpy as np

import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import nn                            # import torch.nn as nn
from torch import optim                         # import torch.optim as optim


from torch.utils.data import (DataLoader,)      # from torch.utils.data import DataLoader
from tqdm import tqdm

from sklearn.metrics import f1_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GridSearchCV, LeaveOneOut, RandomizedSearchCV

In [2]:
from google.colab import files

In [5]:
uploaded = files.upload()

Saving MNIST.zip to MNIST (1).zip


In [6]:
!unzip /content/MNIST.zip

Archive:  /content/MNIST.zip
replace MNIST/raw/t10k-images-idx3-ubyte? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [3]:
""" Neural Network class.
The first layer is a convolutional layer with:
    1 input channel + 8 output channels, a kernel size of 3x3, a stride of 1, and padding () of 1.
The second layer is a max-pooling layer with a kernel size of 2x2 and a stride of 2.
The third layer is a convolutional layer with:
    8 input channels, 16 output channels, a kernel size of 3x3, a stride of 1, and padding of 1.
The fourth layer is a fully connected layer with:
    1677 input features (output from the second convolutional layer) and num_classes output features (10).
"""
class my_CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(my_CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=8,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=8,
            out_channels=16,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
""" Hyperparameters """
in_channels = 1
num_classes = 10
learning_rate = 3e-4    #karpathy's constant
batch_size = 64
num_epochs = 10

In [6]:
train_dataset = datasets.MNIST(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root="dataset/", train=False, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [7]:
# Initialize network
model = my_CNN(in_channels=in_channels, num_classes=num_classes).to(device)

## Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
############### Train
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        ## Get data to cuda (if possible)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        # Perform parameter update
        optimizer.step()

100%|██████████| 938/938 [00:09<00:00, 95.40it/s] 
100%|██████████| 938/938 [00:08<00:00, 111.78it/s]
100%|██████████| 938/938 [00:08<00:00, 110.55it/s]
100%|██████████| 938/938 [00:08<00:00, 113.17it/s]
100%|██████████| 938/938 [00:08<00:00, 108.12it/s]
100%|██████████| 938/938 [00:08<00:00, 111.49it/s]
100%|██████████| 938/938 [00:07<00:00, 119.27it/s]
100%|██████████| 938/938 [00:08<00:00, 110.94it/s]
100%|██████████| 938/938 [00:08<00:00, 110.94it/s]
100%|██████████| 938/938 [00:07<00:00, 119.95it/s]


In [9]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples

In [10]:
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")

Accuracy on training set: 98.30
Accuracy on test set: 97.95


<h2 style="color:#BF66F2 "> <u> Example #2 </u> </h2>

<h3 style="color:#BF66F2 "> Recap: CNN </h3>
<div style="margin-top: -8px;">
The choice of the number of channels for each convolutional layer is often based on the principle of gradually increasing the number of channels
<br> as the spatial resolution of the feature maps decreases.    <br>
This is because the lower layers of the network typically extract low-level features, such as edges and corners, while the higher <br> layers extract more abstract and complex features.<br>

### => Model #2.1

In [11]:
class MY_CNN(nn.Module):
    """ Simple Convolutional Neural Network with two convolutional layers and one fully connected layer.

    Args:
        - Number of input channels [int, default: 1]
        - Number of output classes [int, default: 10]

    Methods:
        - forward(x): Performs a forward pass through the CNN model.

    Details:
        - The CNN model has four layers:
            - 1. Convolutional layer with 420 output channels, kernel size of (3, 3), stride of (1, 1), and padding of (1, 1).
            - 2. Max pooling layer with kernel size of (2, 2) and stride of (2, 2).
            - 3. Convolutional layer with 1000 output channels, kernel size of (3, 3), stride of (1, 1), and padding of (1, 1).
            - 4. Fully connected layer with 1000 * 7 * 7 input features and 'num_classes' output features.

        - Padding = adding extra, typically zero-valued, pixels around the edges of an input image before\\
        it is convolved with a filter.
        - Stride = number of pixels by which the convolutional filter (the kernel) is shifted across\\
        the input image or feature map during the convolution operation.

    """

    def __init__(self, in_channels=1, num_classes=10):
        """ Initializations. """
        super(MY_CNN, self).__init__()

        ###### First convolutional layer
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=420,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        ###### Second convolutional layer
        self.conv2 = nn.Conv2d(
            in_channels=420,
            out_channels=1000,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        # Fully connected layer
        self.fc1 = nn.Linear(1000 * 7 * 7, num_classes)

    def forward(self, x):
        """ Performs a forward pass through all layers of the CNN model.\\
        This method is implicitly called when the model is used to make a prediction on the data.\\
        It is called during training [scores = model(data)] and executed with data as the input tensor x.\\
        The output of the forward method is the predicted scores, to compute the loss function.

        Parameters:
            Input [torch.Tensor of shape (batch_size, in_channels, height, width)]

        Details:
            - Apply the first convolutional layer with kernel size of (3, 3), stride of (1, 1), and padding of (1, 1),\\
                and the ReLU activation function.\\
                The output of layer1 is passed through the The Rectified Linear Unit function element-wise.\\
                ReLU is commonly used in CNNs, since it introduces nonlinearity into the network,\\
                to make it capable of learning more complex and expressive representations of the input data.
            - Apply max pooling layer with kernel size of (2, 2) and stride of (2, 2).
            - Apply the second convolutional layer with kernel size of (3, 3), stride of (1, 1),\\
            and padding of (1, 1), and the ReLU activation function.
            - Apply max pooling layer with kernel size of (2, 2) and stride of (2, 2).
            - Reshape the tensor to have a size of (batch_size, 1000 * 7 * 7).
                The input to the fully connected layer must be a 1D tensor,\\
                    so we need to reshape the output of the second convolutional layer to a 1D tensor\\
                    before passing it to the fully connected layer.
                - x.reshape(x.shape[0], -1)
                    - 1) First dimension of the tensor ==> batch size, is kept the same, while the remaining\\
                    dimensions are collapsed into a single dimension.
                    - 2) Second dimension of the tensor ==> "-1" indicates that the size of the remaining dimension\\
                    should be inferred automatically based on the size of the tensor and the specified batch size.
            - Apply the fully connected layer to output a tensor of size (batch_size, 'num_classes').
        Returns:
            Output tensor of shape (batch_size, 'num_classes') [a torch.Tensor]
        """
        # First convolutional layer
        x = F.relu(self.conv1(x))
        # Max pooling layer
        x = self.pool(x)
        # Second convolutional layer
        x = F.relu(self.conv2(x))
        # Max pooling layer
        x = self.pool(x)
        # Reshape the tensor
        x = x.reshape(x.shape[0], -1)
        # Fully connected layer (batch_size, 'num_classes')
        x = self.fc1(x)

        return x

In [12]:
""" Check that GPU is working """
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

assert device.type == "cuda", "GPU not available"

cuda


In [13]:
""" Hyperparameters (initial guesses). """
in_channel = 1
num_classes = 10
learning_rate = 3e-4
batch_size = 100
num_epochs = 5

In [14]:
train_dataset = datasets.MNIST(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root="dataset/", train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [15]:
# Define model, loss and optimizer
model = MY_CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

<h3 style="color:#BF66F2 "> Recap: Forward pass </h3>
<div style="margin-top: -8px;">

PyTorch.cuda.amp.GradScaler() to scale the gradients during backpropagation to avoid numerical underflow or overflow <br> when training with mixed precision. <br>
Mixed precision training is a technique that involves using a combination of single-precision and half-precision floating-point numbers <br> to perform forward and backward passes through the neural network. <br>
This technique can help reduce the memory requirements and computational cost of training deep learning models without sacrificing accuracy. <br>

In mixed precision training, the gradients that are computed during the backward pass may be very small or very large, <br> depending on the scale of the input data and the model parameters.

In [16]:
# Define a Scaler (necessary for FP16!)
scaler = torch.cuda.amp.GradScaler()

### => Train the model

In [17]:
for epoch in range(num_epochs):
    # Wrap train_loader with tqdm
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)

    for batch_idx, (data, targets) in loop:
        # Send data to cuda
        data = data.to(device=device)
        targets = targets.to(device=device)

        ########## Forward pass through the model to get the predicted scores
        with torch.cuda.amp.autocast():
            scores = model(data)
            # Calculate the loss using the predicted scores and the actual targets
            loss = criterion(scores, targets)

        ######## Backward pass to calculate the gradients of the loss function w.r.t. the model parameters
        # Reset the gradients of all model parameters
        optimizer.zero_grad()
        # Scale the loss to avoid numerical underflow or overflow and compute gradients
        scaler.scale(loss).backward()
        # Update the model parameters using the computed gradients
        scaler.step(optimizer)
        # Update the scale factor for the next iteration
        scaler.update()

        # Update the tqdm loop with additional information (like current loss)
        loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
        loop.set_postfix(loss=loss.item())

Epoch [0/5]: 100%|██████████| 600/600 [00:26<00:00, 22.28it/s, loss=0.032]
Epoch [1/5]: 100%|██████████| 600/600 [00:27<00:00, 22.12it/s, loss=0.0256]
Epoch [2/5]: 100%|██████████| 600/600 [00:27<00:00, 21.94it/s, loss=0.0805]
Epoch [3/5]: 100%|██████████| 600/600 [00:27<00:00, 21.79it/s, loss=0.00576]
Epoch [4/5]: 100%|██████████| 600/600 [00:27<00:00, 21.94it/s, loss=0.00144]


In [18]:
def check_accuracy(loader, model):
    """ Point out the model's accuracy on the data from the give dataloader.

    Parameters:
        - DataLoader object that provides the data to check the model's accuracy [torch.utils.data.DataLoader]
        - Model to be evaluated model [torch.nn.Module]

    Details:
        - Initialize counters for the number of correct predictions and the total number of samples
        - Set the model to evaluation mode
        - Disable gradient computation using 'torch.no_grad()'
        - Iterate over the data in the loader, making predictions using the model and comparing them to the ground truth labels
        - Finally, train() the model and print the accuracy of the model on the data from the loader
    """
    num_correct, num_samples = 0, 0
    model.eval()

    # Disable gradient computation for efficiency reasons
    with torch.no_grad():
        for x, y in loader:
            # Get data to cuda if possible
            x = x.to(device=device)
            y = y.to(device=device)

            ## Compute the predicted labels as the class with the highest score
            scores = model(x)
            _, predictions = scores.max(1)

            ## Update the number of correct predictions and the number of samples
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}")

    # Set the model back to training mode
    model.train()

In [19]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Got 59784 / 60000 with accuracy 99.64
Got 9907 / 10000 with accuracy 99.07


In [20]:
def f1_scorer(model, X, y_true):
    """ Compute the F1 score for multi-class classification.

    Parameters:
        - model (): Trained PyTorch model [nn.Module]
        - Input data X [torch.Tensor]
        - Target tensor y_true [torch.Tensor]

    Returns:
        - F1 score [float]
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    model.to(device)
    X, y_true = X.to(device), y_true.to(device)
    with torch.no_grad():
        y_pred = model(X)
        y_pred = torch.argmax(y_pred, dim=1)
    score = f1_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average='macro')
    return score

In [21]:
X_test_list, y_test_list = [], []
# Ensure model is in evaluation mode
model.eval()

# Iterate over test data to collate it into one large tensor
with torch.no_grad():
    for data, targets in test_loader:
        X_test_list.append(data.cpu())
        y_test_list.append(targets.cpu())

# Convert lists of tensors to one large tensor
X_test = torch.cat(X_test_list, dim=0)
y_test = torch.cat(y_test_list, dim=0)

""" Calculate F1 score
=> N.B.
To avoid the OutOfMemoryError: CUDA out of memory. Tried to allocate 12.27 GiB, it is necessary to consider only a small part of the dataset)
"""
f1 = f1_scorer(model, X_test[:100], y_test[:100])
print(f"F1 Score: {f1}")

F1 Score: 0.9714501537110232


### => Model #2.2

In [22]:
class MY_CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(MY_CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=420,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )

        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

        self.conv2 = nn.Conv2d(
            in_channels=420,
            out_channels=1000,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )

        self.fc1 = nn.Linear(1000 * 7 * 7, num_classes)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = self.pool(x)
        x = nn.functional.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        out = self.fc1(x)
        return out

    def fit(self, train_loader, criterion, optimizer, lr, num_epochs):
        """ Trains the CNN model using the specified hyperparameters.

        Parameters:
            - DataLoader object containing the training data [torch.utils.data.DataLoader]
            - Loss function (criterion) to use for training [torch.nn.modules.loss._Loss]
            - Optimizer used for training [torch.optim.Optimizer]
            - Learning rate to use for training [float]
            - Number of epochs to train for [int]

        Returns:
            - Trained CNN model
        """
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(device)

        ## Define optimizer and scheduler
        optimizer = optimizer(self.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.1)


        ###################################### Train the model
        for epoch in range(num_epochs):
            train_loss = 0.0
            for i, (inputs, labels) in enumerate(train_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = self(inputs)
                loss = criterion(outputs, labels)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item() * inputs.size(0)

            scheduler.step(train_loss)

        return self

<div style="line-height:0.8">
<h4> <b> Note: </b> <h4>
</div>
<div style="line-height:0.1">
<h4> Common problems with "grid_search.fit([train_loader], y_train)" <h4>
</div>
<div style="line-height:1.4">

- ValueError: Cannot have number of splits n_splits=3 greater than the number of samples: n_samples=1. <br>
- ValueError: k-fold cross-validation requires at least one train/test split by setting n_splits=2 or more, got n_splits=1. <br>
- TypeError: Singleton array array(<torch.utils.data.dataloader.DataLoader object at 0x780366fd59f0>, <br>
      dtype=object) cannot be considered a valid collection.
</div>

In [23]:
""" Get dataset as numpy arrays.
N.B.
Using:
    #train_data_numpy = train_dataset.numpy()
    #train_labels_numpy = train_dataset.targets.numpy()
will lead to Error: 'MNIST' object has no attribute 'numpy'...
"""
train_data_numpy = []
train_labels_numpy = []

for i, (image, label) in enumerate(train_dataset):
    # Convert the image to a numpy array
    image_numpy = image.numpy()
    train_data_numpy.append(image_numpy)
    train_labels_numpy.append(label)

train_data_numpy = np.array(train_data_numpy)
train_labels_numpy = np.array(train_labels_numpy)

In [24]:
train_data_numpy[:3]

array([[[[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]],


       [[[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]],


       [[[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]]], dtype=float32)

<h3 style="color:#BF66F2 "> Grid Search </h3>
Adding too many parameters will soon make the code to ask for too much RAM and the program may crash!

In [25]:
class MyCNNWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, in_channels=1, num_classes=10, learning_rate=0.001, epochs=10):
        """
        Initialize the wrapper with default parameters.\\
        The parameters can then be adjusted with grid search.
        """
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.model = MY_CNN(in_channels=in_channels, num_classes=num_classes)
        self.criterion = torch.nn.CrossEntropyLoss()

    def fit(self, X, y):
        """ Train the model """
        self.model.fit(X, self.criterion, torch.optim.Adam, self.learning_rate, self.epochs)
        return self

    def predict(self, X):
        """ Make predictions. """
        # Set model to evaluation mode
        self.model.eval()
        outputs = self.model(torch.tensor(X, dtype=torch.float32))
        _, preds = torch.max(outputs, 1)
        return preds.numpy()

    def get_params(self, deep=True):
        """ Get parameters for this estimator. """
        return {
            "in_channels": self.in_channels,
            "num_classes": self.num_classes,
            "learning_rate": self.learning_rate,
            "epochs": self.epochs
        }

    def set_params(self, **parameters):
        """ Set the parameters of the estimator. """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        # Update model
        self.model = MY_CNN(in_channels=self.in_channels, num_classes=self.num_classes)
        # Update loss
        self.criterion = torch.nn.CrossEntropyLoss()
        return self

In [26]:
%%script echo Skipping, it may cause RAM crash.
""" GridSearchCV.
N.B.1
It works but it use too much RAM!
N.B.2
The fit function need to be defined also to use it ! (self, train_loader, criterion, optimizer, lr, num_epochs).
"""
##### Define the hyperparameters to search over
param_grid = {
    'hidden_size': [64, 128, 256],
    'learning_rate': [1e-3, 3e-4, 1e-4],
    'batch_size': [50, 100, 200],
    'num_epochs': [5, 10, 20],
}
# Create an instance of the wrapper class
wrapper = MyCNNWrapper()

# Create an instance of the LeaveOneOut class
loo = LeaveOneOut()

grid_search = GridSearchCV(estimator=wrapper, param_grid=param_grid, cv=loo, scoring=f1_scorer) #cv=1 scoring='f1_macro')

# Train the model using grid search to find the best hyperparameters
grid_search.fit(train_data_numpy, train_labels_numpy)

print(grid_search.best_params_)
# Evaluate the best model found by grid search
best_model = grid_search.best_estimator_

Skipping, it may cause RAM crash.


In [27]:
%%script echo Skipping, it may cause RAM crash.
""" Random GridSearchCV
N.B.1
It works but it use too much RAM!
N.B.2
#ValueError: --> fits failed. It is very likely that your model is misconfigured.
N.B.3
TypeError: Adam object is not callable
"""
###### Define the hyperparameters to search over
param_grid = {
    'hidden_size': [64, 128, 256],
    'learning_rate': [1e-3, 3e-4, 1e-4],
    'batch_size': [50, 100, 200],
    'num_epochs': [5, 10, 20],
}
# Create an instance of the wrapper class
wrapper = MyCNNWrapper()

# Create an instance of the LeaveOneOut class
loo = LeaveOneOut()

# Create an instance of the RandomizedSearchCV class
random_search = RandomizedSearchCV(estimator=wrapper, param_distributions=param_grid, n_iter=10, cv=3) # cv=loo, scoring=f1_scorer)

# Train the model using random search to find the best hyperparameters
random_search.fit(train_data_numpy, train_labels_numpy)

print(random_search.best_params_)
# Evaluate the best model found by grid search
best_model = grid_search.best_estimator_

Skipping, it may cause RAM crash.


In [28]:
%%script echo Skipping, it may cause RAM crash.
best_model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = best_model(images)
        _, predicted = torch.max(outputs.data, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print("Test accuracy: %.2f%%" % (accuracy * 100))

Skipping, it may cause RAM crash.
