In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from pathlib import Path

In [2]:
# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


# FashionMNIST
## Data

In [4]:
path = Path('../data/')

In [7]:
if  not (path / 'FashionMNIST').exists():
    print('hola')

In [8]:
# Transformations applied on each image
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize the images
])

# Load the dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






## Model

In [9]:
# Define the CNN model
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),  # Input: 1 x 28 x 28, Output: 64 x 28 x 28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))  # Output: 64 x 14 x 14

        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),  # Output: 64 x 12 x 12
            nn.ReLU(),
            nn.MaxPool2d(2))  # Output: 64 x 6 x 6

        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)  # Flatten the output
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [13]:
from torchsummary import summary

model = FashionCNN().to(device)  # Assuming your model is already defined and moved to the device
summary(model, input_size=(1, 28, 28))  # (Channels, Height, Width)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 28, 28]             640
              ReLU-2           [-1, 64, 28, 28]               0
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4           [-1, 64, 12, 12]          36,928
              ReLU-5           [-1, 64, 12, 12]               0
         MaxPool2d-6             [-1, 64, 6, 6]               0
            Linear-7                  [-1, 128]         295,040
            Linear-8                   [-1, 10]           1,290
Total params: 333,898
Trainable params: 333,898
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.02
Params size (MB): 1.27
Estimated Total Size (MB): 2.30
----------------------------------------------------------------


### Train

In [10]:
# Instantiate the model, loss function, and optimizer
model = FashionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function to train the model
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f'Train Epoch: {epoch} -- Loss: {loss.item():.6f}')

# Training the model
for epoch in range(1, 50):  # Train for 50 epochs
    train(epoch)

Train Epoch: 1 -- Loss: 0.480955
Train Epoch: 2 -- Loss: 0.379081
Train Epoch: 3 -- Loss: 0.290524
Train Epoch: 4 -- Loss: 0.340867
Train Epoch: 5 -- Loss: 0.279709
Train Epoch: 6 -- Loss: 0.264307
Train Epoch: 7 -- Loss: 0.260853
Train Epoch: 8 -- Loss: 0.215272
Train Epoch: 9 -- Loss: 0.203518
Train Epoch: 10 -- Loss: 0.195016
Train Epoch: 11 -- Loss: 0.217275
Train Epoch: 12 -- Loss: 0.202714
Train Epoch: 13 -- Loss: 0.187355
Train Epoch: 14 -- Loss: 0.179235
Train Epoch: 15 -- Loss: 0.175244
Train Epoch: 16 -- Loss: 0.179273
Train Epoch: 17 -- Loss: 0.187090
Train Epoch: 18 -- Loss: 0.197918
Train Epoch: 19 -- Loss: 0.137742
Train Epoch: 20 -- Loss: 0.151495
Train Epoch: 21 -- Loss: 0.141840
Train Epoch: 22 -- Loss: 0.164637
Train Epoch: 23 -- Loss: 0.181047
Train Epoch: 24 -- Loss: 0.142565
Train Epoch: 25 -- Loss: 0.108934
Train Epoch: 26 -- Loss: 0.121558
Train Epoch: 27 -- Loss: 0.130847
Train Epoch: 28 -- Loss: 0.116375
Train Epoch: 29 -- Loss: 0.113763
Train Epoch: 30 -- Loss

### Evaluate

In [11]:
# Evaluation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 91.59%


# Horses or Humans

## Data

In [14]:
import urllib.request
import zipfile

# training set
url = "https://storage.googleapis.com/learning-datasets/horse-or-human.zip"
file_name = "horse-or-human.zip"
training_dir = 'horse-or-human/training/'
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(training_dir)
zip_ref.close()

# validation set
url = "https://storage.googleapis.com/learning-datasets/validation-horse-or-human.zip"
file_name = "validation-horse-or-human.zip"
validation_dir = 'horse-or-human/validation/'
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(validation_dir)
zip_ref.close()

### `DataLoader`

In [38]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations
train_transform = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomAffine(
        degrees=0,  # No rotation
        translate=(0.2, 0.2),  # Translate up to 20% vertically and horizontally
        scale=(0.8, 1.2),  # Zoom in or out by 20%
        shear=20,  # Shear by up to 20 degrees
    ),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Transforms for the validation data
val_transform = transforms.Compose([
    transforms.Resize(150),
    transforms.CenterCrop(150),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load the datasets
train_dataset = datasets.ImageFolder(root=training_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(root=validation_dir, transform=val_transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

## Model

In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class HorsesHumansCNN(nn.Module):
    def __init__(self):
        super(HorsesHumansCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 18 * 18, 512)
        self.drop = nn.Dropout(0.25)
        self.fc2 = nn.Linear(512, 1)  # Only 1 output neuron for binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 18 * 18)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)  # Use sigmoid to output probabilities
        return x

In [40]:
# instantiate the model
model = HorsesHumansCNN().to(device)
summary(model, input_size=(3, 150, 150))  # (Channels, Height, Width)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 150, 150]             448
         MaxPool2d-2           [-1, 16, 75, 75]               0
            Conv2d-3           [-1, 32, 75, 75]           4,640
         MaxPool2d-4           [-1, 32, 37, 37]               0
            Conv2d-5           [-1, 64, 37, 37]          18,496
         MaxPool2d-6           [-1, 64, 18, 18]               0
            Linear-7                  [-1, 512]      10,617,344
           Dropout-8                  [-1, 512]               0
            Linear-9                    [-1, 1]             513
Total params: 10,641,441
Trainable params: 10,641,441
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.26
Forward/backward pass size (MB): 5.98
Params size (MB): 40.59
Estimated Total Size (MB): 46.83
-----------------------------------

### Train

In [41]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).float()  # Convert labels to float
            optimizer.zero_grad()
            outputs = model(images).view(-1)  # Flatten outputs to match label shape
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

        # Evaluate on training set
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5  # Threshold predictions
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f'Training Set Accuracy: {100 * correct / total}%')

        # Evaluate on validation set
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5  # Threshold predictions
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f'Validation Set Accuracy: {100 * correct / total}%')

In [42]:
# train for 15 epochs
train_model(15)

Epoch 1, Loss: 0.6193670251152732
Training Set Accuracy: 75.26777020447906%
Validation Set Accuracy: 46.484375%
Epoch 2, Loss: 0.4475600204684518
Training Set Accuracy: 80.81791626095423%
Validation Set Accuracy: 52.734375%
Epoch 3, Loss: 0.34609963270750915
Training Set Accuracy: 85.97857838364168%
Validation Set Accuracy: 80.859375%
Epoch 4, Loss: 0.2705128515760104
Training Set Accuracy: 88.60759493670886%
Validation Set Accuracy: 60.15625%
Epoch 5, Loss: 0.2848006807493441
Training Set Accuracy: 86.56280428432328%
Validation Set Accuracy: 67.96875%
Epoch 6, Loss: 0.18927325184146562
Training Set Accuracy: 93.4761441090555%
Validation Set Accuracy: 60.546875%
Epoch 7, Loss: 0.175696986868526
Training Set Accuracy: 93.7682570593963%
Validation Set Accuracy: 60.15625%
Epoch 8, Loss: 0.18096410424561438
Training Set Accuracy: 94.0603700097371%
Validation Set Accuracy: 57.421875%
Epoch 9, Loss: 0.1891976407531536
Training Set Accuracy: 96.10516066212269%
Validation Set Accuracy: 60.9375

### Evaluate

In [25]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device).float()
        outputs = model(images).view(-1)
        predicted = outputs > 0.5  # Threshold predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        print(outputs)
        print(labels)
    print(f'Validation Accuracy: {100 * correct / total}%')

tensor([1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 9.9999e-01,
        9.3058e-01, 1.0000e+00, 3.0574e-04, 1.0000e+00, 1.6046e-02, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 6.3237e-06,
        9.9981e-01, 1.0000e+00, 1.0000e+00, 9.9999e-01, 7.8751e-01, 1.0000e+00,
        1.1637e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 7.3181e-01,
        9.9940e-01, 1.0000e+00], device='cuda:0')
tensor([1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 0.,
        0., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1.],
       device='cuda:0')
tensor([9.8918e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
        2.9353e-09, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 2.1095e-03,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 2.3674e-07, 7.1404e-04,
        1.0000e+00, 9.9519e-01, 1.0000e+00, 8.8512e-01, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 6.0471e-04, 1.0000e+00, 1.4796e-01, 1.000

### Deploy

In [26]:
from PIL import Image
from torchvision import transforms

# Define transformations
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert('RGB')  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

# Prediction function
def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        prediction = output > 0.5
        class_name = "Human" if prediction.item() == 1 else "Horse"
        print(image_path)
        print(f"The image is predicted to be a {class_name}.")
        print(output)

In [37]:
deploy_dir = Path('./horse-or-human/deploy/')
for file in deploy_dir.glob('*.jpg'):
    predict(file, model, device, transform)

horse-or-human/deploy/horse-1330690_640.jpg
The image is predicted to be a Horse.
tensor([[6.7261e-08]], device='cuda:0')
horse-or-human/deploy/portrait-7942151_640.jpg
The image is predicted to be a Horse.
tensor([[0.0174]], device='cuda:0')


In [None]:
# from google.colab import files
# uploaded = files.upload()

# for img in uploaded.keys():
#   predict(img, model, device, transform)


# Transfer Learning

## Data

In [None]:
import urllib.request
import zipfile

url = "https://storage.googleapis.com/learning-datasets/horse-or-human.zip"
file_name = "horse-or-human.zip"
training_dir = 'horse-or-human/training/'
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(training_dir)
zip_ref.close()

url = "https://storage.googleapis.com/learning-datasets/validation-horse-or-human.zip"
file_name = "validation-horse-or-human.zip"
validation_dir = 'horse-or-human/validation/'
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(validation_dir)
zip_ref.close()

## Model

In [43]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)

def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")

# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary
summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /home/casadoj/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100.0%


 : Inception3
Conv2d_1a_3x3 : BasicConv2d
Conv2d_1a_3x3.conv : Conv2d
Conv2d_1a_3x3.bn : BatchNorm2d
Conv2d_2a_3x3 : BasicConv2d
Conv2d_2a_3x3.conv : Conv2d
Conv2d_2a_3x3.bn : BatchNorm2d
Conv2d_2b_3x3 : BasicConv2d
Conv2d_2b_3x3.conv : Conv2d
Conv2d_2b_3x3.bn : BatchNorm2d
maxpool1 : MaxPool2d
Conv2d_3b_1x1 : BasicConv2d
Conv2d_3b_1x1.conv : Conv2d
Conv2d_3b_1x1.bn : BatchNorm2d
Conv2d_4a_3x3 : BasicConv2d
Conv2d_4a_3x3.conv : Conv2d
Conv2d_4a_3x3.bn : BatchNorm2d
maxpool2 : MaxPool2d
Mixed_5b : InceptionA
Mixed_5b.branch1x1 : BasicConv2d
Mixed_5b.branch1x1.conv : Conv2d
Mixed_5b.branch1x1.bn : BatchNorm2d
Mixed_5b.branch5x5_1 : BasicConv2d
Mixed_5b.branch5x5_1.conv : Conv2d
Mixed_5b.branch5x5_1.bn : BatchNorm2d
Mixed_5b.branch5x5_2 : BasicConv2d
Mixed_5b.branch5x5_2.conv : Conv2d
Mixed_5b.branch5x5_2.bn : BatchNorm2d
Mixed_5b.branch3x3dbl_1 : BasicConv2d
Mixed_5b.branch3x3dbl_1.conv : Conv2d
Mixed_5b.branch3x3dbl_1.bn : BatchNorm2d
Mixed_5b.branch3x3dbl_2 : BasicConv2d
Mixed_5b.branc

In [44]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if 'Mixed_7c' in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),                # Activation layer
    nn.Linear(1024, 2)         # Final layer for binary classification
)

### DataLoader

In [46]:
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

### Train

In [47]:
def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = loss1 + 0.4 * loss2  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(output, 1)  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

In [48]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

Epoch 1/3 - Loss: 4.2077, Acc: 0.9513
Epoch 2/3 - Loss: 3.4964, Acc: 0.9942
Epoch 3/3 - Loss: 3.4556, Acc: 0.9981


### Evaluate

In [49]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    total = 0
    corrects = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Handle multiple outputs during evaluation
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # Use only the main output for evaluation

            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels).item()
            total += labels.size(0)

    accuracy = corrects / total
    print(f'Accuracy on the validation set: {accuracy:.4f} ({corrects}/{total})')
    return accuracy

In [None]:
# # Assuming the necessary imports and pre_trained_model are defined and set up
# # Ensure the model and data loaders are on the appropriate device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# pre_trained_model = pre_trained_model.to(device)

In [50]:
# Assuming val_loader is defined and set up as previously shown
accuracy = evaluate_model(pre_trained_model, val_loader, device)

Accuracy on the validation set: 0.9414 (241/256)


# Cats versus Dogs
(Note the following cells will only work if you have already run the above cells for training Horses v Humans)

In [None]:
import urllib.request
import zipfile

!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
    -O "cats_and_dogs_filtered.zip"


zip_ref = zipfile.ZipFile("cats_and_dogs_filtered.zip", 'r')
zip_ref.extractall("/tmp")
zip_ref.close()

training_dir = "/tmp/cats_and_dogs_filtered/train/"
validation_dir = "/tmp/cats_and_dogs_filtered/validation/"

In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)

def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")


# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary
summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)

In [None]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if 'Mixed_7c' in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),                # Activation layer
    nn.Linear(1024, 2)         # Final layer for binary classification
)

transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = loss1 + 0.4 * loss2  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(output, 1)  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')





In [None]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

In [None]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    total = 0
    corrects = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Handle multiple outputs during evaluation
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # Use only the main output for evaluation

            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels).item()
            total += labels.size(0)

    accuracy = corrects / total
    print(f'Accuracy on the validation set: {accuracy:.4f} ({corrects}/{total})')
    return accuracy

# Assuming the necessary imports and pre_trained_model are defined and set up
# Ensure the model and data loaders are on the appropriate device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model = pre_trained_model.to(device)

# Assuming val_loader is defined and set up as previously shown

accuracy = evaluate_model(pre_trained_model, val_loader, device)

In [None]:
from PIL import Image
from torchvision import transforms


def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert('RGB')  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

    # Prediction function
def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        print(output)
        prediction = torch.max(output, 1)
        print(prediction)

In [None]:
from google.colab import files
uploaded = files.upload()

for img in uploaded.keys():
  predict(img, pre_trained_model, device, transform)

# Rock Paper Scissors

## Data
### Download

In [53]:
import urllib.request
import zipfile

!wget --no-check-certificate \
    https://storage.googleapis.com/learning-datasets/rps.zip -O "rps.zip"

zip_ref = zipfile.ZipFile("rps.zip", 'r')
zip_ref.extractall("/tmp")
zip_ref.close()

training_dir = "/tmp/rps/"

--2025-12-04 10:57:13--  https://storage.googleapis.com/learning-datasets/rps.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.200.91, 216.58.215.187, 142.250.178.187, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.200.91|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 200682221 (191M) [application/zip]
Saving to: ‘rps.zip’

rps.zip              24%[===>                ]  46.11M  3.54MB/s    eta 47s    ^C


BadZipFile: File is not a zip file

### DataLoader

In [54]:
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

## Model

### Architecture

In [52]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)

def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")

# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary
summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)



 : Inception3
Conv2d_1a_3x3 : BasicConv2d
Conv2d_1a_3x3.conv : Conv2d
Conv2d_1a_3x3.bn : BatchNorm2d
Conv2d_2a_3x3 : BasicConv2d
Conv2d_2a_3x3.conv : Conv2d
Conv2d_2a_3x3.bn : BatchNorm2d
Conv2d_2b_3x3 : BasicConv2d
Conv2d_2b_3x3.conv : Conv2d
Conv2d_2b_3x3.bn : BatchNorm2d
maxpool1 : MaxPool2d
Conv2d_3b_1x1 : BasicConv2d
Conv2d_3b_1x1.conv : Conv2d
Conv2d_3b_1x1.bn : BatchNorm2d
Conv2d_4a_3x3 : BasicConv2d
Conv2d_4a_3x3.conv : Conv2d
Conv2d_4a_3x3.bn : BatchNorm2d
maxpool2 : MaxPool2d
Mixed_5b : InceptionA
Mixed_5b.branch1x1 : BasicConv2d
Mixed_5b.branch1x1.conv : Conv2d
Mixed_5b.branch1x1.bn : BatchNorm2d
Mixed_5b.branch5x5_1 : BasicConv2d
Mixed_5b.branch5x5_1.conv : Conv2d
Mixed_5b.branch5x5_1.bn : BatchNorm2d
Mixed_5b.branch5x5_2 : BasicConv2d
Mixed_5b.branch5x5_2.conv : Conv2d
Mixed_5b.branch5x5_2.bn : BatchNorm2d
Mixed_5b.branch3x3dbl_1 : BasicConv2d
Mixed_5b.branch3x3dbl_1.conv : Conv2d
Mixed_5b.branch3x3dbl_1.bn : BatchNorm2d
Mixed_5b.branch3x3dbl_2 : BasicConv2d
Mixed_5b.branc

In [None]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if 'Mixed_7c' in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),                # Activation layer
    # nn.DropOut(0.5),
    nn.Linear(1024, 3)         # Final layer for binary classification
)

### Train

In [62]:
def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = loss1 + 0.4 * loss2  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(output, 1)  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

In [63]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

Epoch 1/3 - Loss: 3.8386, Acc: 0.9528
Epoch 2/3 - Loss: 3.2533, Acc: 1.0000
Epoch 3/3 - Loss: 3.2535, Acc: 1.0000


### Evaluate

In [None]:
from PIL import Image
from torchvision import transforms

def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert('RGB')  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

# Prediction function
def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        print(output)
        prediction = torch.max(output, 1)
        print(prediction)

In [None]:
from google.colab import files
uploaded = files.upload()

for img in uploaded.keys():
  predict(img, pre_trained_model, device, transform)