In [7]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.io import read_image
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
import os
from IPython.utils import io
from collections import OrderedDict

Device Setup
Use parallel computing on GPU in CUDA-capable systems, otherwise - use CPU.

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

Load Data
Local Dataset and Data Loader Functions
The CustomImageDataset function is used for fetching images from the [train, test, valid] directories, and storing them in a Dataset object, along with their labels from [train_labels.csv, test_labels.csv, valid_labels.csv,]

The create_dataloader function, on the other hand, is used for transforming the Dataset object, converting it into a Tensor datatype, and storing it as a batch-based Dataloader object, ready for training.

In [9]:
class CustomImageDataset(Dataset):
    """
    load local dataset to a structure recognized by dataloaders
    - input labels: Pandas DataFrame
    - input img_dir: string, the path to the samples directory
    - input transform (optional): a Torch transforms object

    - output image: a Pytorch Tensor of samples
    - output label: a list of labels
    """
    def __init__(self, labels, img_dir, transform=None):
        self.img_labels = labels
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):      
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        label = self.img_labels.iloc[idx, 1]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

def create_dataloader(data_dir, batch_size, labels, size=224):
    """
    transforms custom dataset and converts it into a pytorch dataloader object
    - input data_dir: string with the path to a data directory
    - input batch_size: an integer that represents the batch size
    - input labels: Pandas DataFrame that stores the data labels
    - input size (optional): sets the final pixel width and height of the samples

    - output dataloader: a pytorch dataloader object
    """
    if size == 224:
        # resize and crop AlteredNet samples
        transform = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
        ])
    elif size == 512:
        # no resizing and cropping required
        transform = transforms.Compose([
            transforms.ToTensor(),
        ])
    else:
        print("sorry, only size 224 or 512 can be loaded, please try again")
        transform = None
    
    # load samples into a custom image dataset
    data = CustomImageDataset(labels, data_dir, transform=transform)

    # convert custom image dataset into a data loader
    dataloader = DataLoader(data, batch_size=batch_size, shuffle=True)
    return dataloader

Data Loader

In [None]:
label_df = {i: pd.read_csv("D:\Adnan\Project Data\Python Programming\Image Classification CV\RealFakeImageClassification\AlteredNet\data\labels" + i + "_labels.csv") for i in ["train", "test", "valid"]}

dataloaders = {}

dataloaders["train"] = create_dataloader("data/train", 5, label_df["train"], size=224)
dataloaders["test"] = create_dataloader("data/test", 5, label_df["test"], size=224)
dataloaders["valid"] = create_dataloader("data/valid", 5, label_df["valid"], size=224)