In [3]:
import os
import zipfile
import urllib.request
import shutil

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [4]:
import tarfile

def extract_tar_gz(file_path, extract_to):
    if not os.path.exists(extract_to):
        print(f"Extraient {file_path}...")
        with tarfile.open(file_path, 'r:gz') as tar:
            tar.extractall(path=extract_to)
        print(f"Extret a: {extract_to}")
    else:
        print(f"a extret: {extract_to}")

# Descomprimir
extract_tar_gz("../data/images.tar.gz", "../data/images")
extract_tar_gz("../data/annotations.tar.gz", "../data/annotations")


a extret: ../data/images
a extret: ../data/annotations


In [5]:
import shutil
from sklearn.model_selection import train_test_split

trainval_file = "../data/annotations/annotations/trainval.txt"
images_dir = "../data/images/images"

with open(trainval_file, "r") as f:
    lines = f.readlines()

train_lines, val_lines = train_test_split(lines, test_size=0.2, random_state=42)

def reorganize(lines, split="train"):
    for line in lines:
        parts = line.strip().split()
        image_name = parts[0] + ".jpg"
        species = int(parts[2])  # 1 = cat, 2 = dog
        
        label = "cats" if species == 1 else "dogs"

        src_path = os.path.normpath(os.path.join(images_dir, image_name))
        dst_dir = os.path.join("../data", split, label)
        dst_path = os.path.join(dst_dir, image_name)

        os.makedirs(dst_dir, exist_ok=True)

        if os.path.exists(src_path):
            shutil.copyfile(src_path, dst_path)

# Reorganitzar imatges
reorganize(train_lines, split="train")
reorganize(val_lines, split="val")
print("Imatges reorganitzades correctament.")


Imatges reorganitzades correctament.


In [6]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Transformació bàsica
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

# Datasets i DataLoaders
dataset_train = ImageFolder("../data/train", transform=transform)
dataset_val = ImageFolder("../data/val", transform=transform)

train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=32, shuffle=False)


In [7]:
import torch.nn as nn
import torch.nn.functional as F

class SoftMax(nn.Module):
    def __init__(self, input_size=128*128*3, num_classes=2):
        super(SoftMax, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)  # flatten
        out = self.linear(x)
        return out


In [8]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)  # 128x128 input, 2 pools (halven la mida 128 -> 64 -> 32)
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # batch x 16 x 64 x 64
        x = self.pool(F.relu(self.conv2(x)))  # batch x 32 x 32 x 32
        x = x.view(x.size(0), -1)             # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [9]:

#imatges 128x128 RGB
softmax_model = SoftMax(input_size=128*128*3, num_classes=2)

simplecnn_model = SimpleCNN(num_classes=2)

print(softmax_model)
print(simplecnn_model)


SoftMax(
  (linear): Linear(in_features=49152, out_features=2, bias=True)
)
SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [14]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer_softmax = optim.SGD(softmax_model.parameters(), lr=0.1, momentum=0.1)
optimizer_simplecnn = optim.SGD(simplecnn_model.parameters(), lr=0.1, momentum=0.1)


In [15]:
train_loader = DataLoader(dataset_train, batch_size=5, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=5, shuffle=False)

In [12]:
max_accuracy = 0
n_epochs = 5
model = simplecnn_model
optimizer = optimizer_simplecnn
iteracions = 0
print(len(train_loader))

for epoch in range(n_epochs):
    model.train()
    running_loss = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if iteracions % 250 == 0:
            print(f"Iter {iteracions+1}, Loss: {running_loss:.4f}")
        iteracions+=1

    # Validació
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}, Accuracy: {accuracy:.4f}")

    if accuracy > max_accuracy:
        max_accuracy = accuracy

print("Maximum validation accuracy:", max_accuracy)


589
Iter 1, Loss: 0.6843
Iter 31, Loss: 22.8428
Iter 61, Loss: 41.5551
Iter 91, Loss: 61.1551
Iter 121, Loss: 79.3222
Iter 151, Loss: 97.5567
Iter 181, Loss: 118.5495
Iter 211, Loss: 137.8146
Iter 241, Loss: 155.1523
Iter 271, Loss: 174.0639
Iter 301, Loss: 192.4060
Iter 331, Loss: 211.7655
Iter 361, Loss: 230.2677
Iter 391, Loss: 249.9545
Iter 421, Loss: 269.7781
Iter 451, Loss: 288.3966
Iter 481, Loss: 305.2344
Iter 511, Loss: 323.5838
Iter 541, Loss: 340.1402
Iter 571, Loss: 357.4550
Epoch 1, Loss: 368.7257, Accuracy: 0.6576
Iter 601, Loss: 6.7772
Iter 631, Loss: 23.8060
Iter 661, Loss: 41.0741
Iter 691, Loss: 57.0322
Iter 721, Loss: 73.7836
Iter 751, Loss: 90.8226
Iter 781, Loss: 108.4393
Iter 811, Loss: 124.1039
Iter 841, Loss: 142.7686
Iter 871, Loss: 158.8139
Iter 901, Loss: 174.3726
Iter 931, Loss: 191.8272
Iter 961, Loss: 209.0023
Iter 991, Loss: 226.4794
Iter 1021, Loss: 242.5197
Iter 1051, Loss: 258.9680
Iter 1081, Loss: 276.0611
Iter 1111, Loss: 293.3380
Iter 1141, Loss: 30

In [39]:
from PIL import Image
import torch

def predict_image(image_path, model, transform):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0) 

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    
    classes = ['cat', 'dog']  
    return classes[predicted.item()]


In [48]:
import random

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])

idx_to_class = {0: "cat", 1: "dog"}

val_dir = "../data/val"
all_images = []

for label_dir in os.listdir(val_dir):
    full_path = os.path.join(val_dir, label_dir)
    if os.path.isdir(full_path):
        for fname in os.listdir(full_path):
            if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                all_images.append(os.path.join(full_path, fname))

sample_images = random.sample(all_images, 5)

model.eval()
for img_path in sample_images:
    image = Image.open(img_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0)

    with torch.no_grad():
        output = model(input_tensor)
        pred = torch.argmax(output, dim=1).item()
        label = idx_to_class[pred]

    print(f"{os.path.basename(img_path)} → Predicció: {label}")


german_shorthaired_115.jpg → Predicció: dog
Bengal_136.jpg → Predicció: dog
Birman_165.jpg → Predicció: cat
english_setter_160.jpg → Predicció: dog
basset_hound_170.jpg → Predicció: dog


In [None]:
import os
import torch
import matplotlib.pyplot as plt
from torchvision import transforms
from PIL import Image, ImageOps

def predict_image(img_path, model, transform, class_names=["cats", "dogs"]):
    image = ImageOps.exif_transpose(Image.open(img_path)).convert("RGB")
    input_tensor = transform(image).unsqueeze(0) 
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)
    return class_names[predicted.item()], image

directory_in_str = "../data/val/proves"
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

image_files = [f for f in os.listdir(directory_in_str) if f.lower().endswith((".jpg", ".png", ".jpeg"))]

for filename in image_files:
    img_path = os.path.join(directory_in_str, filename)
    prediction, image = predict_image(img_path, model, transform)

    plt.imshow(image)
    plt.title(f"Prediction: {prediction}")
    plt.axis("off")
    plt.show()
