In [1]:
# 🧩 Step 1: Download and Extract Dataset
import urllib.request
import zipfile

# Training set
url = "https://storage.googleapis.com/learning-datasets/horse-or-human.zip"
file_name = "horse-or-human.zip"
training_dir = 'horse-or-human/training/'
urllib.request.urlretrieve(url, file_name)
zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(training_dir)
zip_ref.close()

# Validation set
url = "https://storage.googleapis.com/learning-datasets/validation-horse-or-human.zip"
file_name = "validation-horse-or-human.zip"
validation_dir = 'horse-or-human/validation/'
urllib.request.urlretrieve(url, file_name)
zip_ref = zipfile.ZipFile(file_name, 'r')
zip_ref.extractall(validation_dir)
zip_ref.close()


In [2]:
# 🧩 Step 2: Data Augmentation and Loaders
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomAffine(degrees=0, translate=(0.2,0.2), scale=(0.8,1.2), shear=20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

# Validation usually shouldn't be augmented, but we'll reuse same transform here for simplicity
val_transform = train_transform

train_dataset = datasets.ImageFolder(root=training_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(root=validation_dir, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)


In [3]:
# 🧩 Step 3: CNN Model
import torch
import torch.nn as nn
import torch.nn.functional as F

class H_H_CNN(nn.Module):
    def __init__(self):
        super(H_H_CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 18 * 18, 512)
        self.drop = nn.Dropout(0.25)
        self.fc2 = nn.Linear(512, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # -> (16, 75, 75)
        x = self.pool(F.relu(self.conv2(x)))  # -> (32, 37, 37)
        x = self.pool(F.relu(self.conv3(x)))  # -> (64, 18, 18)
        x = x.view(-1, 64 * 18 * 18)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        return torch.sigmoid(x)


In [4]:
# 🧩 Step 4: Training Setup
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = H_H_CNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
# 🧩 Step 5: Train the Model
def train_model(num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).float()
            optimizer.zero_grad()
            outputs = model(images).view(-1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}')

        # Training Accuracy
        model.eval()
        with torch.no_grad():
            correct = total = 0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f"Training Accuracy: {100 * correct / total:.2f}%")

        # Validation Accuracy
        with torch.no_grad():
            correct = total = 0
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f"Validation Accuracy: {100 * correct / total:.2f}%")

train_model(5)


Epoch 1, Loss: 0.7032
Training Accuracy: 69.62%
Validation Accuracy: 52.34%
Epoch 2, Loss: 0.5515
Training Accuracy: 78.19%
Validation Accuracy: 51.17%
Epoch 3, Loss: 0.4452
Training Accuracy: 81.79%
Validation Accuracy: 54.69%
Epoch 4, Loss: 0.3770
Training Accuracy: 85.69%
Validation Accuracy: 62.50%
Epoch 5, Loss: 0.2936
Training Accuracy: 76.44%
Validation Accuracy: 57.03%


In [6]:
# 🧩 Step 7: Model Summary
from torchsummary import summary
summary(model, input_size=(3, 150, 150))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 150, 150]             448
         MaxPool2d-2           [-1, 16, 75, 75]               0
            Conv2d-3           [-1, 32, 75, 75]           4,640
         MaxPool2d-4           [-1, 32, 37, 37]               0
            Conv2d-5           [-1, 64, 37, 37]          18,496
         MaxPool2d-6           [-1, 64, 18, 18]               0
            Linear-7                  [-1, 512]      10,617,344
           Dropout-8                  [-1, 512]               0
            Linear-9                    [-1, 1]             513
Total params: 10,641,441
Trainable params: 10,641,441
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.26
Forward/backward pass size (MB): 5.98
Params size (MB): 40.59
Estimated Total Size (MB): 46.83
-----------------------------------

In [7]:
# 🧩 Step 8: Predict Custom Image
from PIL import Image
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

def load_image(image_path, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)  # add batch dimension
    return image

def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform).to(device)
    with torch.no_grad():
        output = model(image)
        prediction = output > 0.5
        class_name = "Horse" if prediction.item() == 1 else "Human"
        print(f"Image: {image_path}")
        print(f"Prediction: {class_name}")
        print("Probability:", output.item())


In [8]:
# 🧩 Step 9: Upload and Predict from File (Colab)
from google.colab import files
uploaded = files.upload()

for img_path in uploaded.keys():
    predict(img_path, model, device, transform)


Saving download (7).jpg to download (7).jpg
Image: download (7).jpg
Prediction: Horse
Probability: 0.9990899562835693
