<a href="https://colab.research.google.com/github/eliav98/code_snippets/blob/master/Nueral_Networks_ex1_208674556.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, Configurations, Constants

In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.14.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.19.1-py2.py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.2/199.2 KB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)

In [None]:
import os
import random

import numpy as np

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms

from tqdm.auto import tqdm

import wandb

In [None]:
# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Active device is: {device}")

# Login to wandb
wandb.login()

Active device is: cuda:0


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
HP_LIST = [ 'dataset',
            'classes',
            'architecture',
            'optimizer',
            'padding',
            'activation_function' ,
            'pooling',
            'pooling_size',
            'pooling_stride',
            'filters',
            'filter_size',
            'filter_stride',
            'epochs',
            'batch_size',
            'learning_rate',]

DATASET = 'CIFAR-10'
CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # for CIFAR10 classification
N_CLASSES = len(CLASSES) # 10
IN_C = 3 # number of input channels
IN_H = 32 # height of the input image
IN_W = 32 # width of the input image

ARCHITECTURE = 'CNN'
OPTIMIZER = 'SGD'
PADDING = 1
ACTIVATION_FUNCTION = nn.ReLU
POOLING = 'MaxPooling'
POOLING_SIZE = 2
POOLING_STRIDE = 2

CROSS_ENTROPY = nn.CrossEntropyLoss()
SGD = torch.optim.SGD

IMG_DISPLAY_BATCH_SIZE = 10

# Define Experiments and Pipeline

In [None]:
DEFAULTS = dict(dataset=DATASET,
                classes=N_CLASSES,
                architecture=ARCHITECTURE,
                optimizer=OPTIMIZER,
                padding=PADDING,
                activation_function = ACTIVATION_FUNCTION,
                pooling=POOLING,
                pooling_size=POOLING_SIZE,
                pooling_stride=POOLING_STRIDE)

In [None]:
hyperparameters1 = dict(DEFAULTS,
                 **dict(filters=[16],
                        filter_size=3,
                        filter_stride=1,
                        epochs=10,
                        batch_size=32,
                        learning_rate=0.001,))

hyperparameters2 = dict(DEFAULTS,
                 **dict(filters=[16,32,64],
                        filter_size=3,
                        filter_stride=1,
                        epochs=50,
                        batch_size=128,
                        learning_rate=0.001,))

hyperparameters3 = dict(DEFAULTS,
                 **dict(filters=[128,256,512,1024],
                        filter_size=5,
                        filter_stride=2,
                        epochs=100,
                        batch_size=64,
                        learning_rate=0.0001,))
hyperparameters3

{'dataset': 'CIFAR-10',
 'classes': 10,
 'architecture': 'CNN',
 'optimizer': 'SGD',
 'padding': 1,
 'activation_function': torch.nn.modules.activation.ReLU,
 'pooling': 'MaxPooling',
 'pooling_size': 2,
 'pooling_stride': 2,
 'filters': [128, 256, 512, 1024],
 'filter_size': 5,
 'filter_stride': 2,
 'epochs': 100,
 'batch_size': 64,
 'learning_rate': 0.0001}

In [None]:
baseline = dict(learning_rate=0.001,
                batch_size=4,
                momentum=0.9,
                epochs=2,
                architecture='BaselineCNN')

baseline2 = dict(learning_rate=0.0001,
                batch_size=4,
                momentum=0.9,
                epochs=2,
                architecture='BaselineCNN')

simple_cnn = dict(learning_rate=0.0001,
                batch_size=4,
                momentum=0.9,
                epochs=2,
                architecture='SimpleCNN')


In [None]:
from collections import namedtuple
HPTuple = namedtuple('HPTuple', HP_LIST)

def model_pipeline(hyperparameters, track=False, save=False):


    # with wandb.init(project="idkhowshouldicallthis", config=hyperparameters):
    if track: wandb.init(project="idkhowshouldicallthis", config=hyperparameters)

    # access all HPs through wandb.config, so logging matches execution and config.hyperparam is possible
    config = wandb.config if track else HPTuple(**hyperparameters)

    # make the model, data, and optimization problem
    model, train_loader, test_loader, criterion, optimizer = make(config)

    # and use them to train the model
    train(model, train_loader, criterion, optimizer, config, track=track, test_loader=test_loader)
    # and test its final performance
    model, images = test(model, test_loader, track, save)

    return model, images

In [None]:
def make(config):
    print(config)
    # Make the data
    train, test = get_data(train=True), get_data(train=False)
    train_loader = make_loader(train, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)

    # Make the model
    # model = ConvNet(filters=config.filters, filter_size=config.filter_size, filter_stride=config.filter_stride, classes=config.classes).to(device)
    model = BaselineCNN().to(device)



    # Make the loss and optimizer
    criterion = CROSS_ENTROPY
    optimizer = SGD(model.parameters(), lr=config.learning_rate, momentum=config.momentum)

    return model, train_loader, test_loader, criterion, optimizer

# Define the Data Loading and Models

In [None]:
# If dataset has PILImage images of range [0, 1] then we transform them to tensors of normalized range [-1, 1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

def get_data(slice=5, train=True):
    full_dataset = torchvision.datasets.CIFAR10(root='./data',
                                              train=train,
                                              transform=transform,
                                              download=True)
    #  equiv to slicing with [::slice]
    sub_dataset = torch.utils.data.Subset(
      full_dataset, indices=range(0, len(full_dataset), slice))

    return full_dataset


def make_loader(dataset, batch_size):
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         pin_memory=True, num_workers=2)
    return loader

In [None]:
channels = [(c1, c2) for c1, c2 in [(1, 2)]]
[nn.Sequential(nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=PADDING),
                                     nn.ReLU(),
                                     nn.MaxPool1d(kernel_size=POOLING_SIZE, stride=POOLING_STRIDE))
                                      for c1, c2 in channels]

[Sequential(
   (0): Conv2d(1, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (1): ReLU()
   (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 )]

In [None]:
class ConvNet(nn.Module):
    def __init__(self, filters, filter_size, filter_stride, classes):
        super(ConvNet, self).__init__()
        self.filters = filters
        channels = zip([IN_C] + filters[:-1], filters)
        self.layers = [nn.Sequential(nn.Conv2d(in_c, out_c, kernel_size=filter_size, stride=filter_stride, padding=PADDING),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=POOLING_SIZE, stride=POOLING_STRIDE)) for in_c, out_c in channels]
        print("The layers are: ", self.layers)
        print("The calculation of the FC layer input size is:")
        print((IN_H // 2**len(filters)),'*',(IN_H // 2**len(filters)),'*',filters[-1])
        self.fc = nn.Linear((IN_H // 2**len(filters)) * (IN_H // 2**len(filters)) * filters[-1], classes)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = x.reshape(x.size(0), -1)
        out = self.fc(x)
        return out

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class BaselineCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [None]:
models = {'BaselineCNN':BaselineCNN, 'SimpleCNN':SimpleCNN}

Sanity check:

In [None]:
# cnn1 = ConvNet(filters=[16], filter_size=3, filter_stride=1, classes=10)

# Define Training Logic

In [None]:
def train(model, loader, criterion, optimizer, config, track=False, save=False, test_loader=None):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    # if track: wandb.watch(model, criterion, log="all", log_freq=1000)

    # Run training and track with wandb
    total_batches = len(loader) * config.epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0
    for epoch in tqdm(range(config.epochs)):

        running_loss = 0.0
        for _, (images, labels) in enumerate(loader):

            batch_loss = train_batch(images, labels, model, optimizer, criterion)
            example_ct +=  len(images)
            batch_ct += 1

            running_loss += batch_loss

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 1000) == 0:
                if track:

                    train_log(batch_loss, running_loss, example_ct, epoch)
                    if test_loader:
                        test_acc, test_loss = test(model, test_loader, track=track, save=save)
                        test_log(test_acc, test_loss, len(test_loader), epoch)
    if save:
        PATH = './cifar_net.pth'
        torch.save(model.state_dict(), PATH)


def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [None]:
def train_log(loss, running_loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"train/epoch": epoch, "train/loss": loss, 'train/running_loss':running_loss}, step=example_ct)
    print(f"Train: Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

# Define Testing Logic

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

def get_conf_mat(y_true, y_pred):
    cf_matrix = confusion_matrix(y_true, y_pred)
    df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], index = [i for i in CLASSES],
                         columns = [i for i in CLASSES])
    return df_cm

def show_conf_mat(y_true, y_pred):
    df_cm = get_conf_mat(y_true, y_pred)
    plt.figure(figsize = (12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('output.png')

In [None]:
def test(model, test_loader, track=False, save=False):
    model.eval()

    # y_pred = []
    # y_true = []

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        running_loss = 0.0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            # y_pred.extend(predicted.data.cpu().numpy())
            # y_true.extend(labels.data.cpu().numpy())
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += CROSS_ENTROPY(outputs, labels)

        # if track: wandb.log({"test/accuracy": correct / total})
    # show_conf_mat(y_true, y_pred)
    # Save the model in the exchangeable ONNX format
    # if save: torch.onnx.export(model, images, "model.onnx")
    # if save: wandb.save("model.onnx")
    return correct / total, running_loss

In [None]:
def test_log(acc, loss, n_samples, epoch):
    print(f"Test: Accuracy  on {n_samples}  test images: {acc:%}, test_loss: {loss:.3f}, epoch: {epoch}")
    wandb.log({"test/accuracy": acc, 'test/epoch':epoch, 'test/loss':loss})

In [None]:
import numpy as np

def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
def show_results(net, test_loader):
    # get some random training images
    dataiter = iter(test_loader)
    images, labels = next(dataiter)

    # show images
    imshow(torchvision.utils.make_grid(images))

    # print labels
    print(' '.join(f'{CLASSES[labels[j]]:5s}' for j in range(IMG_DISPLAY_BATCH_SIZE)))

    outputs = net(images)
    _, predicted = torch.max(outputs, 1)

    print('Predicted: ', ' '.join(f'{CLASSES[predicted[j]]:5s}'
                                  for j in range(4)))

# Run Experiments

In [None]:
baseline_model, imgs_baseline = model_pipeline(baseline, track=True)

VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.121731…

{'learning_rate': 0.001, 'batch_size': 4, 'momentum': 0.9, 'epochs': 2, 'architecture': 'BaselineCNN'}
Files already downloaded and verified
Files already downloaded and verified


  0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
baseline2_model, imgs_baseline2 = model_pipeline(baseline2, track=True)

VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.121550…

{'learning_rate': 0.0001, 'batch_size': 4, 'momentum': 0.9, 'epochs': 2, 'architecture': 'BaselineCNN'}
Files already downloaded and verified
Files already downloaded and verified


  0%|          | 0/2 [00:00<?, ?it/s]

Train: Loss after 03996 examples: 2.268
Test: Accuracy  on 2500  test images: 9.940000%, test_loss: 5759.336, epoch: 0
Train: Loss after 07996 examples: 2.328
Test: Accuracy  on 2500  test images: 10.680000%, test_loss: 5756.984, epoch: 0
Train: Loss after 11996 examples: 2.327
Test: Accuracy  on 2500  test images: 9.910000%, test_loss: 5754.996, epoch: 0
Train: Loss after 15996 examples: 2.294
Test: Accuracy  on 2500  test images: 11.270000%, test_loss: 5752.806, epoch: 0
Train: Loss after 19996 examples: 2.281
Test: Accuracy  on 2500  test images: 13.120000%, test_loss: 5750.566, epoch: 0
Train: Loss after 23996 examples: 2.303
Test: Accuracy  on 2500  test images: 12.350000%, test_loss: 5747.219, epoch: 0
Train: Loss after 27996 examples: 2.296
Test: Accuracy  on 2500  test images: 15.980000%, test_loss: 5742.845, epoch: 0
Train: Loss after 31996 examples: 2.298
Test: Accuracy  on 2500  test images: 17.560000%, test_loss: 5735.312, epoch: 0
Train: Loss after 35996 examples: 2.267
Te

In [None]:
simple_cnn, _ = model_pipeline(simple_cnn, True)

In [None]:
# wandb.init(project="idkhowshouldicallthis", config=hyperparameters1)
# hyperparameters1 = wandb.config

In [None]:
# # make the model, data, and optimization problem
# model1, train_loader, test_loader, criterion, optimizer = make(hyperparameters1)
# print(model1)
#
# # and use them to train the model1
# train(model1, train_loader, criterion, optimizer, hyperparameters1)
#
# # and test its final performance
# model1, images, y_true, y_pred = test(model1, test_loader)

In [None]:
# show_results(model1, test_loader)

In [None]:
# model1, images1 = model_pipeline(hyperparameters1, track=True)
# model2, images2 = model_pipeline(hyperparameters2, track=True)
# model3, images3 = model_pipeline(hyperparameters3, track=True)