In [1]:
import os
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
from joblib import Parallel, delayed
from PIL import Image
import numpy as np

In [2]:
import torch.nn as nn
import torch
from torch.utils.data import random_split, Dataset, DataLoader
from torchvision import datasets, transforms
from fastbook import *

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


# Download the datatset

In [None]:
cats = 'bird', 'forest'
path = Path('dataset')

In [None]:
if not path.exists():
    path.mkdir(exist_ok=True)
    for c in cats:
        dest = (path/c)
        dest.mkdir(exist_ok=True)
        results = search_images_ddg(f'{c} photo')
        download_images(dest, urls=results[:200])
        resize_image(dest, max_size=400, dest=dest)

In [None]:
failed = verify_images(get_image_files(path))
failed

In [None]:
failed.map(Path.unlink)

# ----------------------------------------------------------------

In [4]:
path = Path('dataset')

In [5]:
ds = datasets.ImageFolder(path)

In [6]:
train_ds, valid_ds = random_split(ds, [0.8, 0.2])

In [7]:
class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        
    def __len__(self, ):
        return len(self.dataset)
        
    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        image = np.array(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_LINEAR)
        image = image / 255.0
        return torch.tensor(image, dtype=torch.float).permute(2, 0, 1), label

In [8]:
training_dataset = CustomDataset(train_ds)
validation_dataset = CustomDataset(valid_ds)

In [9]:
training_dataset[1]

(tensor([[[0.0392, 0.0510, 0.0000,  ..., 0.0510, 0.0706, 0.0902],
          [0.0000, 0.0039, 0.1451,  ..., 0.0000, 0.0392, 0.0118],
          [0.0706, 0.0353, 0.0824,  ..., 0.0000, 0.1569, 0.0000],
          ...,
          [0.0039, 0.0000, 0.0118,  ..., 0.0588, 0.0000, 0.0510],
          [0.0078, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0314],
          [0.0235, 0.1098, 0.0000,  ..., 0.0118, 0.0000, 0.0549]],
 
         [[0.6078, 0.5843, 0.3333,  ..., 0.4784, 0.4627, 0.5608],
          [0.4431, 0.7020, 0.6039,  ..., 0.1961, 0.5020, 0.3608],
          [0.5922, 0.6235, 0.6118,  ..., 0.1569, 0.5686, 0.4275],
          ...,
          [0.2902, 0.2941, 0.1373,  ..., 0.0549, 0.0000, 0.1176],
          [0.3176, 0.3020, 0.1608,  ..., 0.0196, 0.0039, 0.0549],
          [0.2078, 0.2980, 0.0588,  ..., 0.0039, 0.0000, 0.1255]],
 
         [[0.9922, 0.9451, 0.7333,  ..., 0.9412, 0.9412, 0.9686],
          [0.7961, 1.0000, 0.9804,  ..., 0.5255, 0.9059, 0.8549],
          [0.9804, 0.9765, 0.9765,  ...,

In [10]:
# Updated parallel function
def parallel_load_and_preprocess(dataset, indices):
    return Parallel(n_jobs=-1)(
        delayed(dataset.__getitem__)(idx) for idx in indices
    )

In [11]:
# Use parallel processing on a subset of indices
indices = range(len(training_dataset))  # or any specific subset
processed_data = parallel_load_and_preprocess(training_dataset, indices)

In [12]:
indices = range(len(validation_dataset))  # or any specific subset
processed_val_data = parallel_load_and_preprocess(validation_dataset, indices)

In [13]:
training_dataloader = DataLoader(processed_data, batch_size=32, shuffle=True, num_workers=4)

In [14]:
validation_dataloader = DataLoader(processed_val_data, batch_size=16, shuffle=False, num_workers=4)

In [16]:
import torch.nn as nn
import torch.optim as optim

In [15]:
# Define your model (example: a simple CNN)
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 112 * 112, 10)  # Adjust the output size

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 16 * 112 * 112)
        x = self.fc1(x)
        return x

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [17]:
class ResNetModel(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

# Instantiate the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetModel(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [18]:
def training():
    #for epoch in range(3):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (inputs, labels) in enumerate(training_dataloader, 0):
        inputs, labels = inputs.to(device), labels.to(device)  # Move to GPU

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Print every 5 mini-batches
        if (i + 1) % 5 == 0:
            print(f"Loss: {running_loss / 5:.3f}")
            running_loss = 0.0

    epoch_accuracy = correct / total
    return epoch_accuracy


In [19]:
from torch.cuda.amp import GradScaler, autocast

scaler = GradScaler()

def smth():
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (inputs, labels) in enumerate(training_dataloader, 0):
        inputs, labels = inputs.to(device), labels.to(device)  # Move to GPU

        optimizer.zero_grad()
        with autocast():  # Mixed precision context
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Print every 5 mini-batches
        if (i + 1) % 5 == 0:
            print(f"Loss: {running_loss / 5:.3f}")
            running_loss = 0.0

    epoch_accuracy = correct / total
    return epoch_accuracy

In [20]:
def validate():
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for val_data in validation_dataloader:
            val_inputs, val_labels = val_data
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)

            val_outputs = model(val_inputs)
            val_loss += criterion(val_outputs, val_labels).item()
            _, predicted = torch.max(val_outputs, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()

    val_loss /= len(validation_dataloader)
    val_accuracy = correct / total
    print(f"Validation loss: {val_loss:.3f}, Accuracy: {val_accuracy:.3f}")

In [21]:
def run():
    for epoch in range(5):
        epoch_accuracy = smth()
        print(f"Epoch {epoch + 1} completed. Training Accuracy: {epoch_accuracy:.3f}")
        validate()

In [22]:
%prun -l 10 -s cumtime run()

Loss: 0.646
Loss: 0.333
Epoch 1 completed. Training Accuracy: 0.750
Validation loss: 0.153, Accuracy: 1.000
Loss: 0.114
Loss: 0.052
Epoch 2 completed. Training Accuracy: 0.997
Validation loss: 0.026, Accuracy: 1.000
Loss: 0.025
Loss: 0.040
Epoch 3 completed. Training Accuracy: 1.000
Validation loss: 0.016, Accuracy: 1.000
Loss: 0.017
Loss: 0.026
Epoch 4 completed. Training Accuracy: 0.997
Validation loss: 0.009, Accuracy: 1.000
Loss: 0.017
Loss: 0.019
Epoch 5 completed. Training Accuracy: 0.997
Validation loss: 0.008, Accuracy: 1.000
 

         164733 function calls (149746 primitive calls) in 8.646 seconds

   Ordered by: cumulative time
   List reduced from 362 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    8.646    8.646 {built-in method builtins.exec}
        1    0.000    0.000    8.646    8.646 <string>:1(<module>)
        1    0.003    0.003    8.646    8.646 2616560762.py:1(run)
        5    0.075    0.015    7.604    1.521 1364531468.py:5(smth)
      210    5.093    0.024    5.093    0.024 {method 'item' of 'torch._C.TensorBase' objects}
       50    0.002    0.000    4.488    0.090 grad_scaler.py:354(step)
       50    0.001    0.000    4.440    0.089 grad_scaler.py:342(_maybe_opt_step)
       50    0.001    0.000    4.363    0.087 {built-in method builtins.sum}
      100    0.000    0.000    4.363    0.044 grad_scaler.py:350(<genexpr>)
 5850/150    0.012    0.000    1.609    0.011 module.py:1528(_wrapped_call_impl)