### import and implement model

In [24]:
# packages
import torch.nn as nn
import torch.optim as optim
import torch.profiler
from torch.nn import Conv2d, LeakyReLU, MaxPool2d, Linear # import them seperetly because I think its more readable
from torchvision.io import read_image
from torch.utils.data import DataLoader, random_split
import torch
import pandas as pd
import numpy as np
import os
from torchvision.transforms import Resize, ConvertImageDtype, Normalize, Compose
from torchvision import transforms

In [25]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
# set seed
torch.manual_seed(126)

# potential opti
torch.backends.cudnn.benchmark = True

# cores set
torch.set_num_threads(8)
torch.set_num_interop_threads(8)

RuntimeError: Error: cannot set number of interop threads after parallel work has started or set_num_interop_threads called

import data and load onto tensors

In [52]:
class ImageDataset(torch.utils.data.Dataset): # Inherit from torch.utils.data.Dataset
    def __init__(self, class_dir, img_dir): # Pass transforms in
        self.img_labels = pd.read_csv(class_dir)
        self.img_dir = img_dir
        self.transform = transforms.Compose([
                transforms.Resize((224, 224), antialias=True),
                transforms.ConvertImageDtype(torch.float32), # Convert to float
            ])

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx): # Handle potential tensor index
            idx = idx.tolist()

        img_filename = self.img_labels.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_filename)
        label = self.img_labels.iloc[idx, 1]

        image = read_image(img_path)
        image = image.type(torch.float32) # convert to more efficent dtype

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        # Convert label to tensor (assuming classification)
        label = torch.tensor(label, dtype=torch.int8)

        return image, label


In [53]:
# directories for classes and images
class_dir = '/content/drive/MyDrive/colab/class_ids.csv'
image_dir = '/content/drive/MyDrive/colab/resize_data'

# load dataset using made class function
data_set = ImageDataset(class_dir, image_dir) # create dataset

# set train and test set
train_size = int(.75 * len(data_set))
test_size = int(.125 * len(data_set))
val_size = len(data_set) - test_size - train_size

# random split
training, testing, val = random_split(data_set, [train_size, test_size, val_size])

In [54]:
batch_size = 128 # batch size, up from 32

In [55]:
# load the split data on the tensors | added the colleate fn bc of corrupted files
train_loader = DataLoader(training, batch_size=batch_size, num_workers=0, pin_memory=True,  prefetch_factor=None, shuffle=True) # workers to preload and increase training speed
test_loader = DataLoader(testing, batch_size=batch_size, num_workers=8, pin_memory=True,  prefetch_factor=2,shuffle=True)
val_loader = DataLoader(val, batch_size=batch_size, num_workers=8,pin_memory=True, prefetch_factor=2,shuffle=True)

model without transfer learning (will add just wanted to build one from stratch)

In [42]:
# I chose to use a CNN for the image classifcation.
# CNNs preform much better then feed forward networks for image classification tasks and are still easy to implement

class CNN (nn.Module):
    def __init__(self):
        super().__init__()
        # 1 input layer, to 32 filters, stride of one pixel, 3x3 kernal, padding = (kernal - 1)/2

        # 3 layers like this
        self.conv1 = Conv2d(in_channels=1, out_channels=32, stride=1, kernel_size=3, padding=1)
        self.Lrelu1 = LeakyReLU() # better preformance on average compared to regular ReLu
        self.bn1 = nn.BatchNorm2d(32)# prevent exploding / vanishing gradients
        self.conv2 = Conv2d(in_channels=32, out_channels=32, stride=1, kernel_size=5, padding=2)
        self.Lrelu2 = LeakyReLU()
        self.bn2 = nn.BatchNorm2d(32)
        self.maxpool1 = MaxPool2d(kernel_size = 2, stride = 2)

        # 3 layers like this
        self.conv3 = Conv2d(in_channels=32, out_channels=64, stride=1, kernel_size=5, padding=2)
        self.Lrelu3 = LeakyReLU() # better preformance on average compared to regular ReLu
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = Conv2d(in_channels=64, out_channels=64, stride=1, kernel_size=7, padding=3)
        self.Lrelu4 = LeakyReLU()
        self.bn4 = nn.BatchNorm2d(64)
        self.maxpool2 = MaxPool2d(kernel_size = 2, stride = 2)

        # 3 layers like this
        self.conv5 = Conv2d(in_channels=64, out_channels=128, stride=1, kernel_size=5, padding=2)
        self.Lrelu5 = LeakyReLU() # better preformance on average compared to regular ReLu
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = Conv2d(in_channels=128, out_channels=128, stride=1, kernel_size=3, padding=1)
        self.Lrelu6 = LeakyReLU()
        self.bn6 = nn.BatchNorm2d(128)
        self.maxpool3 = MaxPool2d(kernel_size = 2, stride = 2)

        # reduce the number of features
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # fully connected layers
        self.fc1 = Linear(128, 512) # pool layer reduces
        self.relu1 = LeakyReLU()
        self.fc3 = Linear(512, 256)
        self.relu3 = LeakyReLU()
        self.dropout2 = nn.Dropout(p = 0.4)
        self.fc4 = nn.Linear(256, 1) # one output


        # this reduces overfitting making one neuron not resposnible for everything, also improves regualrization

    def forward(self, x):
        # Pass through Convolutional Block 1
        x = self.conv1(x)
        x = self.Lrelu1(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.Lrelu2(x)
        x = self.bn2(x)
        x = self.maxpool1(x)

        # Pass through Convolutional Block 2
        x = self.conv3(x)
        x = self.Lrelu3(x)
        x = self.bn3(x)
        x = self.conv4(x)
        x = self.Lrelu4(x)
        x = self.bn4(x)
        x = self.maxpool2(x)

        # Pass through Convolutional Block 3
        x = self.conv5(x)
        x = self.Lrelu5(x)
        x = self.bn5(x)
        x = self.conv6(x)
        x = self.Lrelu6(x)
        x = self.bn6(x)
        x = self.maxpool3(x)

        # pooling layer
        x = self.global_avg_pool(x)

        # flatten
        x = torch.flatten(x, 1)

        # Pass through Fully Connected Layers
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc3(x)
        x = self.dropout2(x) # Apply dropout
        x = self.relu3(x)

        # Pass through the final Linear layer
        x = self.fc4(x)

        # Apply Dropout, sigmoind applied in loss function, better preformance

        return x


model training loop

In [43]:
cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {cuda_available}")
if cuda_available:
    print(f"Number of CUDA devices: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")

device = "cuda"

CUDA Available: True
Number of CUDA devices: 1
Device 0: Tesla T4


In [44]:
# run model on GPU
model = CNN()
model.to(device)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (Lrelu1): LeakyReLU(negative_slope=0.01)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (Lrelu2): LeakyReLU(negative_slope=0.01)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (Lrelu3): LeakyReLU(negative_slope=0.01)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (Lrelu4): LeakyReLU(negative_slope=0.01)
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1,

In [45]:
learning_rate = 0.1 # standard learning rate
loss_fn = nn.BCEWithLogitsLoss() # add activation function in here
optimizer = optim.AdamW(model.parameters(), lr=learning_rate) # adam w has better preformance, weight decay is applied sep,
# leads to more peak ram may have to reduce batch size

In [46]:
def val_set_test():
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    n_rounds = 0
    with torch.no_grad():  # Disable gradient computation
        for batch_idx, (image, label) in enumerate(val_loader):
            # set up
            image = image.to(device).float()
            label = label.to(device).float()

            # make predictions on val
            predictions = model(image)
            predictions = predictions.squeeze()
            loss = loss_fn(predictions, label)

            # loss
            val_loss += loss.item()
            n_rounds = batch_idx + 1

    model.train()  # Set model back to training mode
    return val_loss / n_rounds


In [47]:
def training(epochs):
    model.train()  # Set the model to training mode
    training_losses = []  # To track loss history
    min_loss = float('inf') # es min loss
    patience = 0 # es track
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)# learning rate decay, 0.1 is default gammma
    for i in range(epochs):
        tloss = 0.0
        n_rounds = 0
        for batch_idx, (image, label) in enumerate(train_loader):
            # Move data to device and ensure correct data types
            image = image.to(device).float()
            label = label.to(device).float()

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass
            predictions = model(image)
            predictions = predictions.squeeze()
            loss = loss_fn(predictions, label)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Accumulate loss
            tloss += loss.item()
            n_rounds = batch_idx + 1
            avg_loss = tloss / n_rounds

            # Update training loss stats
            training_losses.append(avg_loss)

        # early stopping
        val_loss = val_set_test() # return loss for validation set

        if val_loss > min_loss:
            patience += 1

        if val_loss < min_loss:
            min_loss = val_loss
            patience = 0

        if patience > 10: # early stopping after 5 rounds
            print(f"early stopping at round {i}")
            return model, training_losses

        scheduler.step() # step for learning rate decay
        print(f"{avg_loss} is the average loss at epoch {i}") # still provide avg and epoch after through early stopping


    return model, training_losses



#### fake testing

In [18]:
import time # For basic timing

# --- Create Fake Data (adjust shape/size/type) ---
print("Creating fake data...")
start_time = time.time()
num_samples = 500
batch_size = 64 # Your batch size
input_shape = (1, 224, 224) # Example image shape
num_classes = 2 # Example output classes
# Use float() for typical model inputs, long() for typical classification labels
fake_inputs = torch.randn(num_samples, *input_shape, dtype=torch.float32)
fake_labels = torch.randint(0, num_classes, (num_samples,), dtype=torch.long)

# Create fake class IDs DataFrame
fake_df = pd.DataFrame({
    'filename': [f'fake_image_{i}.jpg' for i in range(num_samples)],
    'label': fake_labels.numpy()
})

# Create a temporary directory to store fake images
fake_images = torch.randn(num_samples, *input_shape, dtype=torch.float32)
label_array = fake_labels.float()  # Convert to float for binary classification

# Create fake dataset directly without saving files
class FakeDataset():
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
        self.transforms = Resize((224,224), antialias=True)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        image = self.transforms(image)
        return image, label

# Create dataset and dataloader
fake_dataset = FakeDataset(fake_images, label_array)
print(f"Fake data created in {time.time() - start_time:.2f} seconds.")

# Create DataLoader with same parameters as training loader
fake_loader = DataLoader(fake_dataset, batch_size=batch_size, shuffle=True,
                        num_workers=0, prefetch_factor=None)
print(f"Fake DataLoader created with workers={fake_loader.num_workers}, pin_memory={fake_loader.pin_memory}")

Creating fake data...
Fake data created in 0.43 seconds.
Fake DataLoader created with workers=0, pin_memory=False


In [56]:
# Get a batch of data from test_loader
# Get only 2 batches (small subset) from test_loader
images, _ = next(iter(train_loader))
images = images[:1].to(device).float()  # Take only 2 samples

with torch.profiler.profile(
    activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
    record_shapes=True,
    ) as prof:
        # Code to be profiled, e.g., model inference or training loop
        with torch.profiler.record_function("training_loop"):
            output = model(images)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          training_loop         0.00%       0.000us         0.00%       0.000us       0.000us       6.518ms       111.38%       6.518ms       6.518ms             1  
                                          training_loop        17.05%       1.370ms        75.72%       6.085ms       6.085ms       0.000us         0.00%       5.852ms       5.852ms             1  
         

#### training call and eval

In [None]:
t_model, t_loss = training(300)

In [None]:
results = [] # array for res
model.eval() # set model to eval mode, disables dropout

# testing loop
for images, labels in test_loader:
    images = images.to(device).float() # load onto device with correct data type
    labels = labels.to(device).float()

    predictions = model(images) # make predections on image in model
    preds_1_0 = torch.where(predictions>0.5, 1, 0)
    correct = (preds_1_0 == labels)
    results.append(correct.detach().cpu().numpy().mean())

    precision = BinaryPrecision().to(device) # binary percison
    result = precision(preds_1_0.squeeze(), labels)


accuracy = np.array(results).mean()
print(accuracy)
print(f"Binary Precision: {result}")
