In [6]:
import os 
import time 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader


print("Torch version: ", torch.__version__)

device = ("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

print(f"Device: {device}")

Torch version:  2.0.1
Device: mps


### Data Preparation

In [24]:
# data augmentation and normalization for train set
# only normalization for test set 

data_path = 'data'

transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


dataset = datasets.ImageFolder(root=data_path, transform=transform)

# Define the split ratio (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

# Split the dataset
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])



In [25]:
# Define batch size
batch_size = 32

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)


In [30]:
# Check the number of images in the train dataset
num_train_images = len(train_dataset)
print(f"Number of images in the train dataset: {num_train_images}")

# Check the number of labels in the train dataset and the class names
num_classes = len(train_dataset.dataset.classes)
class_names = train_dataset.dataset.classes
print(f"Number of classes: {num_classes}")
print("Class names:", class_names)



Number of images in the train dataset: 4283
Number of classes: 2
Class names: ['train', 'val']


### Model Building