# Packages 

* PIL for images
* torchvision for pytorch computer vision support 
* datasets -> loading dataset with huggingface dataset api
* matplotlib for showing images
* torch and its many packages and modules for nn
* tqdm for progress bar when training and testing
* numpy for math stuff
* ray tune for hyperparameter otpimization
* torch summary for model params count
* typing for better clarity 
* CNN storing the cnn models

In [15]:
# install packages
!pip -q install accelerate datasets evaluate torchvision Pillow 'transformers[torch]' tqdm ray tensorboardX torchmetrics torchsummary scikit-learn numpy torch matplotlib

You should consider upgrading via the '/Users/kelley/Desktop/drawAI/.env/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
from PIL import Image
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.optim as optim
from datasets import load_dataset
from torch.utils.data import Subset
import matplotlib as plt
from tqdm import tqdm
import numpy as np
from torch.utils.data import DataLoader, Subset
import random
from ray import tune
from tensorboardX import SummaryWriter
from ray.tune.stopper import MaximumIterationStopper, TrialPlateauStopper
from ray.tune.schedulers import ASHAScheduler
from torchmetrics import AveragePrecision
from sklearn.metrics import f1_score, precision_score, recall_score
from torchsummary import summary
from datasets import Dataset, DatasetDict
from typing import List, Dict, Any
import ray

  from .autonotebook import tqdm as notebook_tqdm
2024-04-26 21:17:44,796	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-04-26 21:17:44,845	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


# Dataset infromation 
* 10% Subset of Google's 50 million quickdraw dataset 
    * training -> 4.5 million
    * validation -> .5 million 
    * test ->.5 million
* link -> https://huggingface.co/datasets/Xenova/quickdraw-small?row=0
* 28 x 28 images of drawings 
* 345 labels 


In [3]:
# mac gpu loading 
device = 'mps' if torch.backends.mps.is_available() else 'cpu'

# loading dataset into huggign face dataset dictionary/object
dataset:DatasetDict = load_dataset('Xenova/quickdraw-small')
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 4500000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 250000
    })
    valid: Dataset({
        features: ['image', 'label'],
        num_rows: 250000
    })
})


In [4]:
# 3 batches tested for each, previously generated with random
# example
# batcha_selected_classes_10  = random.sample(range(0, 344), 10)
batcha_selected_classes_10 = [121, 162, 120, 211, 43, 185, 177, 171, 232, 322]
batcha_selected_classes_50 = [143, 149, 306, 233, 107, 196, 129, 186, 22, 227, 16, 253, 199, 194, 333, 122, 297, 307, 217, 42, 251, 158, 324, 124, 334, 270, 165, 78, 214, 289, 69, 169, 206, 133, 278, 262, 298, 54, 115, 272, 110, 215, 209, 172, 328, 5, 207, 14, 9, 51]
batchb_selected_classes_10 = [223, 60, 237, 51, 234, 94, 53, 161, 300, 320]
batchb_selected_classes_50 = [148, 30, 79, 253, 18, 6, 141, 116, 150, 273, 68, 339, 262, 302, 197, 56, 333, 182, 265, 323, 101, 226, 308, 155, 171, 199, 25, 28, 48, 10, 342, 66, 104, 318, 185, 330, 263, 257, 35, 296, 70, 228, 272, 316, 42, 69, 233, 127, 217, 169]
batchc_selected_classes_10 = [302, 323, 107, 312, 184, 235, 179, 146, 38, 149]
batchc_selected_classes_50 = [248, 32, 60, 128, 290, 77, 49, 282, 95, 176, 193, 72, 189, 226, 100, 107, 304, 308, 14, 287, 35, 250, 111, 297, 291, 112, 62, 168, 136, 11, 323, 327, 239, 88, 159, 120, 257, 240, 24, 16, 148, 160, 40, 96, 155, 53, 280, 36, 64, 151]

# pytorch must have labels 0 - x, so using dict to create mapping
# i.e 121::0, 162:1, using this mapping later
def create_label_mapping(selected_classes: List[int]) -> Dict[int, int]:
    return {old_label: new_label for new_label, old_label in enumerate(selected_classes)}

# function to create a flag in the dataset to filter based on
# there are better ways to do this but trying to use pretrained models later
# and want it as flexible as possible
def transform_example(example: Dict[str, Any], selected_classes: List[int], label_mapping: Dict[int, int]) -> Dict[str, Any]:
    if example['label'] in selected_classes:
        return {'image': example['image'], 'label': label_mapping[example['label']], 'is_selected': True}
    else:
        return {'is_selected': False}


# dictorying storing each diction
datasets: Dict[str, DatasetDict[str, Dataset]] = {}

# applying everything to all 6 batches 
for batch, selected_classes in [('batcha', batcha_selected_classes_10),
                                ('batcha', batcha_selected_classes_50),
                                ('batchb', batchb_selected_classes_10),
                                ('batchb', batchb_selected_classes_50),
                                ('batchc', batchc_selected_classes_10),
                                ('batchc', batchc_selected_classes_50)]:
    # getting the label mapping
    label_mapping:Dict[int, int] = create_label_mapping(selected_classes)
    # flagging and mapping labels each class
    dataset_with_updated_labels: DatasetDict = dataset.map(lambda x: transform_example(x, selected_classes, label_mapping))
    
    # filtering classes that are flagged/selected
    # very pythonic syntax cuz train, val, test splits 
    filtered_dataset: Dict[str, Dataset] = {split: ds.filter(lambda x: x['is_selected']) for split, ds in dataset_with_updated_labels.items()}
    datasets[f"{batch}_{len(selected_classes)}"] = filtered_dataset


# Data Pipeline 

- transforing dataset to tensors
- storing everything in a dict 
- subsetting data based on batches 

In [5]:
def transform(dataset: DatasetDict) -> Dict[str, torch.Tensor]:
    # pytorch preprocess
    processor: transforms.Compose = transforms.Compose([
        # augment stuff for later to see if it improves performance
        # transforms.RandomHorizontalFlip(),
        # transforms.RandomVerticalFlip(),
        transforms.ToTensor(),  # range of values 0 - 1
        transforms.Normalize((0.5,), (0.5,)),  # std .5, mean .5
    ])
    # convert each image to 3d numpy array then reshape into a (height, width)) tensor
    images: List[torch.Tensor] = [processor(np.array(x).reshape(28, 28)) for x in dataset['image']]
    # return dictionary containing single 3d tensor storing all images and 1d tensor for labels
    return {'image': torch.stack(images), 'label': torch.tensor(dataset['label'])}

## Subsetting data 
Original Data 4.5 Million rows 90/5/5 split
Updated 

- 10 Classes: 100% Train. 100% Validation% 100% Testing

- 50 Classes: 30% Train.  60% Validation% 60% Testing

- 345 Classes: 25% Train. 50% Validation% 50% Testing

- Using pytorch subset to do this!

Setting up dataloaders


In [6]:
# BatchA 10 classes
transformed_dataset_batcha_10 = {split: ds.with_transform(transform) for split, ds in datasets['batcha_10'].items()}

subset_ratio = 1
train_subset_size_batcha_10 = int(subset_ratio * len(transformed_dataset_batcha_10['train']))
subset_train_dataset_batcha_10 = Subset(transformed_dataset_batcha_10['train'], range(train_subset_size_batcha_10))

subset_ratio = 1
val_subset_size_batcha_10 = int(subset_ratio * len(transformed_dataset_batcha_10['valid']))
subset_validation_dataset_batcha_10 = Subset(transformed_dataset_batcha_10['valid'], range(val_subset_size_batcha_10))

subset_ratio = 1
test_subset_size_batcha_10 = int(subset_ratio * len(transformed_dataset_batcha_10['test']))
subset_test_dataset_batcha_10 = Subset(transformed_dataset_batcha_10['test'], range(test_subset_size_batcha_10))

train_loader_batcha_10 = DataLoader(subset_train_dataset_batcha_10, batch_size=16, shuffle=True)
validation_loader_batcha_10 = DataLoader(subset_validation_dataset_batcha_10, batch_size=16, shuffle=False)
test_loader_batcha_10 = DataLoader(subset_test_dataset_batcha_10, batch_size=16, shuffle=False)

# BatchB 10 classes
transformed_dataset_batchb_10 = {split: ds.with_transform(transform) for split, ds in datasets['batchb_10'].items()}

subset_ratio = 1
train_subset_size_batchb_10 = int(subset_ratio * len(transformed_dataset_batchb_10['train']))
subset_train_dataset_batchb_10 = Subset(transformed_dataset_batchb_10['train'], range(train_subset_size_batchb_10))

subset_ratio = 1
val_subset_size_batchb_10 = int(subset_ratio * len(transformed_dataset_batchb_10['valid']))
subset_validation_dataset_batchb_10 = Subset(transformed_dataset_batchb_10['valid'], range(val_subset_size_batchb_10))

subset_ratio = 1
test_subset_size_batchb_10 = int(subset_ratio * len(transformed_dataset_batchb_10['test']))
subset_test_dataset_batchb_10 = Subset(transformed_dataset_batchb_10['test'], range(test_subset_size_batchb_10))

train_loader_batchb_10 = DataLoader(subset_train_dataset_batchb_10, batch_size=16, shuffle=True)
validation_loader_batchb_10 = DataLoader(subset_validation_dataset_batchb_10, batch_size=16, shuffle=False)
test_loader_batchb_10 = DataLoader(subset_test_dataset_batchb_10, batch_size=16, shuffle=False)

# BatchC 10 classes
transformed_dataset_batchc_10 = {split: ds.with_transform(transform) for split, ds in datasets['batchc_10'].items()}

subset_ratio = 1
train_subset_size_batchc_10 = int(subset_ratio * len(transformed_dataset_batchc_10['train']))
subset_train_dataset_batchc_10 = Subset(transformed_dataset_batchc_10['train'], range(train_subset_size_batchc_10))

subset_ratio = 1
val_subset_size_batchc_10 = int(subset_ratio * len(transformed_dataset_batchc_10['valid']))
subset_validation_dataset_batchc_10 = Subset(transformed_dataset_batchc_10['valid'], range(val_subset_size_batchc_10))

subset_ratio = 1
test_subset_size_batchc_10 = int(subset_ratio * len(transformed_dataset_batchc_10['test']))
subset_test_dataset_batchc_10 = Subset(transformed_dataset_batchc_10['test'], range(test_subset_size_batchc_10))

train_loader_batchc_10 = DataLoader(subset_train_dataset_batchc_10, batch_size=16, shuffle=True)
validation_loader_batchc_10 = DataLoader(subset_validation_dataset_batchc_10, batch_size=16, shuffle=False)
test_loader_batchc_10 = DataLoader(subset_test_dataset_batchc_10, batch_size=16, shuffle=False)

# BatchA 50 classes
transformed_dataset_batcha_50 = {split: ds.with_transform(transform) for split, ds in datasets['batcha_50'].items()}

subset_ratio = 0.6
train_subset_size_batcha_50 = int(subset_ratio * len(transformed_dataset_batcha_50['train']))
subset_train_dataset_batcha_50 = Subset(transformed_dataset_batcha_50['train'], range(train_subset_size_batcha_50))

subset_ratio = 0.3
val_subset_size_batcha_50 = int(subset_ratio * len(transformed_dataset_batcha_50['valid']))
subset_validation_dataset_batcha_50 = Subset(transformed_dataset_batcha_50['valid'], range(val_subset_size_batcha_50))

subset_ratio = 0.3
test_subset_size_batcha_50 = int(subset_ratio * len(transformed_dataset_batcha_50['test']))
subset_test_dataset_batcha_50 = Subset(transformed_dataset_batcha_50['test'], range(test_subset_size_batcha_50))

train_loader_batcha_50 = DataLoader(subset_train_dataset_batcha_50, batch_size=16, shuffle=True)
validation_loader_batcha_50 = DataLoader(subset_validation_dataset_batcha_50, batch_size=16, shuffle=False)
test_loader_batcha_50 = DataLoader(subset_test_dataset_batcha_50, batch_size=16, shuffle=False)

# BatchB 50 classes
transformed_dataset_batchb_50 = {split: ds.with_transform(transform) for split, ds in datasets['batchb_50'].items()}

subset_ratio = 0.6
train_subset_size_batchb_50 = int(subset_ratio * len(transformed_dataset_batchb_50['train']))
subset_train_dataset_batchb_50 = Subset(transformed_dataset_batchb_50['train'], range(train_subset_size_batchb_50))

subset_ratio = 0.3
val_subset_size_batchb_50 = int(subset_ratio * len(transformed_dataset_batchb_50['valid']))
subset_validation_dataset_batchb_50 = Subset(transformed_dataset_batchb_50['valid'], range(val_subset_size_batchb_50))

subset_ratio = 0.3
test_subset_size_batchb_50 = int(subset_ratio * len(transformed_dataset_batchb_50['test']))
subset_test_dataset_batchb_50 = Subset(transformed_dataset_batchb_50['test'], range(test_subset_size_batchb_50))

train_loader_batchb_50 = DataLoader(subset_train_dataset_batchb_50, batch_size=16, shuffle=True)
validation_loader_batchb_50 = DataLoader(subset_validation_dataset_batchb_50, batch_size=16, shuffle=False)
test_loader_batchb_50 = DataLoader(subset_test_dataset_batchb_50, batch_size=16, shuffle=False)

# BatchC 50 classes
transformed_dataset_batchc_50 = {split: ds.with_transform(transform) for split, ds in datasets['batchc_50'].items()}

subset_ratio = 0.6
train_subset_size_batchc_50 = int(subset_ratio * len(transformed_dataset_batchc_50['train']))
subset_train_dataset_batchc_50 = Subset(transformed_dataset_batchc_50['train'], range(train_subset_size_batchc_50))

subset_ratio = 0.3
val_subset_size_batchc_50 = int(subset_ratio * len(transformed_dataset_batchc_50['valid']))
subset_validation_dataset_batchc_50 = Subset(transformed_dataset_batchc_50['valid'], range(val_subset_size_batchc_50))

subset_ratio = 0.3
test_subset_size_batchc_50 = int(subset_ratio * len(transformed_dataset_batchc_50['test']))
subset_test_dataset_batchc_50 = Subset(transformed_dataset_batchc_50['test'], range(test_subset_size_batchc_50))

train_loader_batchc_50 = DataLoader(subset_train_dataset_batchc_50, batch_size=8, shuffle=True)
validation_loader_batchc_50 = DataLoader(subset_validation_dataset_batchc_50, batch_size=8, shuffle=False)
test_loader_batchc_50 = DataLoader(subset_test_dataset_batchc_50, batch_size=8, shuffle=False)

transformed_dataset = {split: ds.with_transform(transform) for split, ds in dataset.items()}
# subset selected
train_subset_size_345 = int(0.25 * len(transformed_dataset['train']))
subset_train_dataset_345 = Subset(transformed_dataset['train'], range(train_subset_size_345))

val_subset_size_345 = int(0.50 * len(transformed_dataset['valid']))
subset_validation_dataset_345 = Subset(transformed_dataset['valid'], range(val_subset_size_345))

test_subset_size_345 = int(0.50 * len(transformed_dataset['test']))
subset_test_dataset_345 = Subset(transformed_dataset['test'], range(test_subset_size_345))

train_loader_345 = DataLoader(subset_train_dataset_345, batch_size=16, shuffle=True)
validation_loader_345 = DataLoader(subset_validation_dataset_345, batch_size=16, shuffle=False)
test_loader_345 = DataLoader(subset_test_dataset_345, batch_size=16, shuffle=False)

# Models
- Models for each batch 7 models total!
- 9 CNN models written in CNN_models.py
    - CNN1 - A simple convolutional neural network model.
    - CNN2 - An improved convolutional neural network model with additional layers and modifications.
    - CNN3 - A version with more filters than the base model.
    - CNN4 - A version with additional dense layers.
    - CNN5 - A version with more filters.
    - CNN6 - A version with double convolutional blocks.
    - bestCNN - A version with hyperparameter optimization using Ray Tune.
    - CNN8 - A version with residual blocks based on the Dive into Deep Learning textbook.
    - CNN9 - A version with more layers of residual blocks.


In [7]:
from CNN_models import bestCNN
model_full = bestCNN(345)
print(summary(model_full, (1, 28, 28)))
model_full.to(device)

model_batcha_10 = bestCNN(10)
model_batcha_10.to(device)

model_batchb_10 = bestCNN(10)
model_batchb_10.to(device)

model_batchc_10 = bestCNN(10)
model_batchc_10.to(device)

model_batcha_50 = bestCNN(50)
model_batcha_50.to(device)

model_batchb_50 = bestCNN(50)
model_batchb_50.to(device)

model_batchc_50 = bestCNN(50)
model_batchc_50.to(device)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             288
       BatchNorm2d-2           [-1, 32, 28, 28]              64
              ReLU-3           [-1, 32, 28, 28]               0
           Dropout-4           [-1, 32, 28, 28]               0
            Conv2d-5           [-1, 32, 28, 28]           9,216
       BatchNorm2d-6           [-1, 32, 28, 28]              64
              ReLU-7           [-1, 32, 28, 28]               0
           Dropout-8           [-1, 32, 28, 28]               0
         MaxPool2d-9           [-1, 32, 14, 14]               0
           Conv2d-10           [-1, 64, 14, 14]          18,432
      BatchNorm2d-11           [-1, 64, 14, 14]             128
             ReLU-12           [-1, 64, 14, 14]               0
          Dropout-13           [-1, 64, 14, 14]               0
           Conv2d-14           [-1, 64,

bestCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout(p=0.1, inplace=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (conv5): Conv2d(64, 128, k

# Running the model 
- creating to handle training and so it's flexible for all 7 batches

In [8]:
def train_model(model, train_loader, validation_loader, criterion, optimizer, device, num_epochs, writer_name, save_folder):
    # initialize tensorboard
    writer = SummaryWriter(writer_name)
    
    # training/eval loop
    for epoch in range(num_epochs):
        running_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
        
        # train mode
        model.train()
        
        # process batches
        for batch in progress_bar:
            images = batch['image'].to(device)
            labels = batch['label'].to(device)
            
            # forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
        
        epoch_loss = running_loss / len(train_loader)
        
        # validation
        model.eval()
        val_loss = 0.0
        correct_top1 = 0
        correct_top3 = 0
        total = 0
        preds_list = []
        labels_list = []
        
        # validation loop
        with torch.no_grad():
            for batch in validation_loader:
                images = batch['image'].to(device)
                labels = batch['label'].to(device)
                
                # model predictions
                preds = model(images)
                loss = criterion(preds, labels)
                val_loss += loss.item()
                
                # top-1 and top-3 accuracy
                _, predicted_top1 = torch.max(preds.data, 1)
                _, predicted_top3 = torch.topk(preds.data, 3, dim=1)
                total += labels.size(0)
                preds_list.append(preds)
                labels_list.append(labels)
                
                # update correct guesses
                correct_top1 += (predicted_top1 == labels).sum().item()
                correct_top3 += (predicted_top3 == labels.unsqueeze(1)).any(dim=1).sum().item()
        
        preds_tensor = torch.cat(preds_list, dim=0)
        labels_tensor = torch.cat(labels_list, dim=0)
        
        # final stats
        val_loss /= len(validation_loader)
        val_accuracy_top1 = correct_top1 / total
        val_accuracy_top3 = correct_top3 / total
        
        # additional metrics
        preds_top1 = torch.argmax(preds_tensor, dim=1).cpu().numpy()
        labels_numpy = labels_tensor.cpu().numpy()
        f1 = f1_score(labels_numpy, preds_top1, average='weighted')
        precision = precision_score(labels_numpy, preds_top1, average='weighted')
        recall = recall_score(labels_numpy, preds_top1, average='weighted')
        
        # log to tensorboard
        writer.add_scalar('Train Loss', epoch_loss, epoch)
        writer.add_scalar('Val Loss', val_loss, epoch)
        writer.add_scalar('Val AccuracyTop1', val_accuracy_top1, epoch)
        writer.add_scalar('Val AccuracyTop3', val_accuracy_top3, epoch)
        writer.add_scalar('F1 Score', f1, epoch)
        writer.add_scalar('Precision', precision, epoch)
        writer.add_scalar('Recall', recall, epoch)
        
        # print stats
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, '
              f'Val Accuracy Top1: {val_accuracy_top1:.4f}, Val Accuracy Top3: {val_accuracy_top3:.4f}, '
              f'F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}')
    
    # save model
    save_path = f"{save_folder}"
    torch.save(model.state_dict(), save_path)
    
    writer.close()

In [9]:
criterion = nn.CrossEntropyLoss()

# optimizers 
optimizer_batcha_10 = torch.optim.Adam(model_batcha_10.parameters(), lr=0.001)
optimizer_batchb_10 = torch.optim.Adam(model_batchb_10.parameters(), lr=0.001)
optimizer_batchc_10 = torch.optim.Adam(model_batchc_10.parameters(), lr=0.001)
optimizer_batcha_50 = torch.optim.Adam(model_batcha_50.parameters(), lr=0.001)
optimizer_batchb_50 = torch.optim.Adam(model_batchb_50.parameters(), lr=0.001)
optimizer_batchc_50 = torch.optim.Adam(model_batchc_50.parameters(), lr=0.001)
optimizer_full= torch.optim.Adam(model_full.parameters(), lr=0.001)

# training each model
train_model(model_batcha_10, train_loader_batcha_10, validation_loader_batcha_10, criterion, optimizer_batcha_10, device, num_epochs=3, writer_name='training_data/batcha_10', save_folder = 'weights/batcha_10')

train_model(model_batchb_10, train_loader_batchb_10, validation_loader_batchb_10, criterion, optimizer_batchb_10, device, num_epochs=5, writer_name='training_data/batchb_10', save_folder = 'weights/batchb_10')

train_model(model_batchc_10, train_loader_batchc_10, validation_loader_batchc_10, criterion, optimizer_batchc_10, device, num_epochs=5, writer_name='training_data/batchc_10', save_folder = 'weights/batchc_10')

train_model(model_batcha_50, train_loader_batcha_50, validation_loader_batcha_50, criterion, optimizer_batcha_50, device, num_epochs=5, writer_name='training_data/batcha_50', save_folder = 'weights/batcha_50')

train_model(model_batchb_50, train_loader_batchb_50, validation_loader_batchb_50, criterion, optimizer_batchb_50, device, num_epochs=5, writer_name='training_data/batchb_50', save_folder = 'weights/batchb_50')

train_model(model_batchc_50, train_loader_batchc_50, validation_loader_batchc_50, criterion, optimizer_batchc_50, device, num_epochs=5, writer_name='training_data/batchc_50', save_folder = 'weights/batchc_50')

train_model(model_full, train_loader_345, validation_loader_345, criterion, optimizer_full, device, num_epochs=5, writer_name='training_data/', save_folder = 'weights/345')

Epoch 1/3:   1%|          | 99/9586 [00:02<04:12, 37.60batch/s, loss=0.852]


KeyboardInterrupt: 

# Hyperparamter tuning with raytune!
- raytune will parallize the training!

In [10]:
class TuneCNN(nn.Module):
    def __init__(self, config):
        super(TuneCNN, self).__init__()

        # cnn block 1
        self.conv1 = nn.Conv2d(1, config["channels_1"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn1 = nn.BatchNorm2d(config["channels_1"])
        self.conv2 = nn.Conv2d(config["channels_1"], config["channels_1"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn2 = nn.BatchNorm2d(config["channels_1"])
        self.pool1 = nn.MaxPool2d(2)
        self.dropout1 = nn.Dropout(config["dropout"])

        # cnn block 2
        self.conv3 = nn.Conv2d(config["channels_1"], config["channels_2"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn3 = nn.BatchNorm2d(config["channels_2"])
        self.conv4 = nn.Conv2d(config["channels_2"], config["channels_2"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn4 = nn.BatchNorm2d(config["channels_2"])
        self.pool2 = nn.MaxPool2d(2)
        self.dropout2 = nn.Dropout(config["dropout"])

        # cnn block 3
        self.conv5 = nn.Conv2d(config["channels_2"], config["channels_3"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn5 = nn.BatchNorm2d(config["channels_3"])
        self.conv6 = nn.Conv2d(config["channels_3"], config["channels_3"], kernel_size=config["kernal"], padding='same', bias=False)
        self.bn6 = nn.BatchNorm2d(config["channels_3"])
        self.pool3 = nn.MaxPool2d(2)
        self.dropout3 = nn.Dropout(config["dropout"])

        # calculate output size after pooling layers
        input_size = 28
        output_size = input_size // 2  # after first pooling layer
        output_size //= 2  # after second pooling layer
        output_size //= 2  # after third pooling layer

        # fully connected layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(config["channels_3"] * output_size * output_size, config["hidden"])
        self.fc2 = nn.Linear(config["hidden"], config["hidden"])
        self.fc3 = nn.Linear(config["hidden"], 10)

        self.relu = nn.ReLU()
        if config["activation"] == "ReLU":
            self.tanh = nn.ReLU()
        elif config["activation"] == "Sigmoid":
            self.tanh = nn.Sigmoid()
        else:
            self.tanh = nn.Tanh()
        self.dropout4 = nn.Dropout(config["dropout"])

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool1(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.pool2(x)
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.relu(self.bn6(self.conv6(x)))
        x = self.pool3(x)
        x = self.dropout4(x)
        x = self.flatten(x)
        x = self.tanh(self.fc1(x))
        x = self.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

In [13]:
# creating configuration dictionary!
config = {
    "channels_1": 32,
    "channels_2": 64,
    "channels_3": 128,
    "kernal": tune.choice([3, 5]),
    "dropout": tune.uniform(0.1, 0.5),
    "hidden": 256,
    "activation": tune.choice(["ReLU", "Sigmoid", "Tanh"]),
    "lr": tune.loguniform(1e-4, 1e-2),
}

# two funcitons necessary to prevent rewritting calling
# calling original training function
def train_model_new(config, model, train_loader, validation_loader, criterion, device, num_epochs, writer_name, save_folder):
    # create the optimizer based on the configuration
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])

    # call the original train_model function with the configured optimizer
    train_model(
        model=model,
        train_loader=train_loader,
        validation_loader=validation_loader,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        num_epochs=num_epochs,
        writer_name=writer_name,
        save_folder=save_folder
    )

def tune_model(config):
    # create the model based on the configuration
    model = TuneCNN(config)
    model.to(device)

    # call the train_model function with the configured model and other arguments
    train_model_new(
        config=config,
        model=model_batcha_10,
        train_loader=train_loader_batcha_10,
        validation_loader=validation_loader_batcha_10,
        criterion=criterion,
        device=device,
        num_epochs=3,
        writer_name='training_data/batcha_10',
        save_folder='weights/batcha_10'
    )

analysis = tune.run(
    tune_model,
    config=config,
    num_samples=10,
    resources_per_trial={"cpu": 2},
)

best_config = analysis.get_best_config(metric="accuracy", mode="max")
print("Best configuration: ", best_config)

2024-04-26 21:19:04,527	INFO worker.py:1621 -- Started a local Ray instance.
2024-04-26 21:19:05,209	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-04-26 21:19:05,226	INFO tune.py:657 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-04-26 21:19:37
Running for:,00:00:32.52
Memory:,13.5/16.0 GiB

Trial name,status,loc,activation,dropout,kernal,lr
tune_model_238a1_00000,RUNNING,127.0.0.1:14790,ReLU,0.31604,3,0.000571614
tune_model_238a1_00001,RUNNING,127.0.0.1:14789,Tanh,0.328681,5,0.000184132
tune_model_238a1_00002,RUNNING,127.0.0.1:14791,Tanh,0.337156,5,0.000279503
tune_model_238a1_00003,RUNNING,127.0.0.1:14792,Tanh,0.394202,5,0.000210923
tune_model_238a1_00004,PENDING,,ReLU,0.108964,5,0.00130743
tune_model_238a1_00005,PENDING,,Tanh,0.239099,5,0.00430575
tune_model_238a1_00006,PENDING,,Sigmoid,0.386518,5,0.00250953
tune_model_238a1_00007,PENDING,,Tanh,0.45088,3,0.000106302
tune_model_238a1_00008,PENDING,,Tanh,0.117641,3,0.00931059
tune_model_238a1_00009,PENDING,,Tanh,0.293567,5,0.00134919


Epoch 1/3:   0%|          | 0/9586 [00:00<?, ?batch/s]
Epoch 1/3:   0%|          | 1/9586 [00:00<1:47:37,  1.48batch/s, loss=1.11]
Epoch 1/3:   0%|          | 4/9586 [00:00<25:00,  6.39batch/s, loss=1.22]   
Epoch 1/3:   0%|          | 1/9586 [00:00<1:51:25,  1.43batch/s, loss=0.686]
Epoch 1/3:   0%|          | 9/9586 [00:00<10:52, 14.68batch/s, loss=0.703]
Epoch 1/3:   0%|          | 10/9586 [00:00<10:08, 15.75batch/s, loss=1.25] 
Epoch 1/3:   0%|          | 14/9586 [00:01<07:40, 20.77batch/s, loss=0.636]
Epoch 1/3:   0%|          | 18/9586 [00:01<06:16, 25.39batch/s, loss=1.12] 
Epoch 1/3:   1%|          | 74/9586 [00:02<03:42, 42.80batch/s, loss=0.988]
Epoch 1/3:   0%|          | 0/9586 [00:00<?, ?batch/s][32m [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
Epoch 1/3:   2%|▏         | 145/9586 [00:05<06:2

Best configuration:  None


