<a href="https://www.kaggle.com/code/typicalmango/simple-pytorch-ensemble-cnn?scriptVersionId=198577610" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import pandas as pd
import torch
torch.manual_seed(42)

<torch._C.Generator at 0x7e3f48282d90>

In [2]:
device = (
    'cuda'
    if torch.cuda.is_available()
    else 'mps'
    if torch.backends.mps.is_available()
    else 'cpu'
)
print(f"Using {device}")

Using cpu


In [3]:
# This code isnt cuda compatible yet
from torch.multiprocessing import Pool, Process, set_start_method

if device=='cuda':
    try:
        set_start_method('spawn')
        print("Start method set")
    except RuntimeError:
        print("Unable to set start method")
        pass
    
device = torch.device(device)

In [4]:
from torchvision.transforms import transforms

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [5]:
from torchvision.datasets import FashionMNIST

train_set = FashionMNIST(root='./data', train=True, transform=train_transform, download=True)

test_set = FashionMNIST(root='./data', train=False, transform=test_transform, download=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:14<00:00, 1878598.29it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 136450.08it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:08<00:00, 520357.30it/s] 


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 11164569.28it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [6]:
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate

train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=2, persistent_workers=True, collate_fn=lambda x: [y.to(device) for y in default_collate(x)])
# train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=2, collate_fn=lambda x: [y.to(device) for y in default_collate(x)])

test_loader = DataLoader(test_set, batch_size=64, num_workers=2, persistent_workers=True, collate_fn=lambda x: [y.to(device) for y in default_collate(x)])
# test_loader = DataLoader(test_set, batch_size=64, num_workers=2, collate_fn=lambda x: [y.to(device) for y in default_collate(x)])

In [7]:
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}

In [8]:
import torch.nn as nn

class net(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [9]:
net()

net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [10]:
learning_rate = 1e-1
batch_size = 64
epochs = 5
num_models = 5

In [11]:
model_ensemble = [net().to(device) for _ in range(num_models)]

In [12]:
def train_loop(data_loader, model_ensemble, loss_fn):
    size = len(data_loader.dataset)
    for i, model in enumerate(model_ensemble):
        print(f'model {i+1}:')
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
        model.train()
        for batch, (X, y) in enumerate(data_loader):
            pred = model(X)
            loss = loss_fn(pred, y)
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            if batch % 200 == 0:
                loss, current = loss.item(), batch * batch_size + len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

        print("- - - - - - - - - - - - - - - -")
                
def test_loop(data_loader, model_ensemble, loss_fn):
    for model in model_ensemble:
        model.eval()
    size = len(data_loader.dataset)
    num_batches = len(data_loader)
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in data_loader:
            ensemble_preds = [model(X) for model in model_ensemble]
            pred = torch.mean(torch.stack(ensemble_preds), dim=0)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [13]:
loss_fn = nn.CrossEntropyLoss()

In [14]:
for t in range(epochs):
    print(f"Epoch {t+1}\n===============================")
    train_loop(train_loader, model_ensemble, loss_fn)
    test_loop(test_loader, model_ensemble, loss_fn)
print("Done!")

Epoch 1
model 1:
loss: 2.301896  [   64/60000]
loss: 0.678145  [12864/60000]
loss: 0.548742  [25664/60000]
loss: 0.548167  [38464/60000]
loss: 0.564799  [51264/60000]
- - - - - - - - - - - - - - - -
model 2:
loss: 2.303048  [   64/60000]
loss: 0.618793  [12864/60000]
loss: 0.315016  [25664/60000]
loss: 0.419797  [38464/60000]
loss: 0.716389  [51264/60000]
- - - - - - - - - - - - - - - -
model 3:
loss: 2.332223  [   64/60000]
loss: 0.752687  [12864/60000]
loss: 0.432703  [25664/60000]
loss: 0.463325  [38464/60000]
loss: 0.498069  [51264/60000]
- - - - - - - - - - - - - - - -
model 4:
loss: 2.329745  [   64/60000]
loss: 0.416089  [12864/60000]
loss: 0.556852  [25664/60000]
loss: 0.509201  [38464/60000]
loss: 0.602536  [51264/60000]
- - - - - - - - - - - - - - - -
model 5:
loss: 2.296519  [   64/60000]
loss: 0.788691  [12864/60000]
loss: 0.545077  [25664/60000]
loss: 0.578153  [38464/60000]
loss: 0.544312  [51264/60000]
- - - - - - - - - - - - - - - -
Test Error: 
 Accuracy: 83.1%, Avg lo