In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
! pip install dask-image

Collecting dask-image
  Downloading dask_image-2023.8.1-py2.py3-none-any.whl.metadata (23 kB)
Collecting pims>=0.4.1 (from dask-image)
  Downloading PIMS-0.6.1.tar.gz (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting slicerator>=0.9.8 (from pims>=0.4.1->dask-image)
  Downloading slicerator-1.1.0-py3-none-any.whl.metadata (1.9 kB)
Downloading dask_image-2023.8.1-py2.py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading slicerator-1.1.0-py3-none-any.whl (10 kB)
Building wheels for collected packages: pims
  Building wheel for pims (setup.py) ... [?25ldone
[?25h  Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82615 sha256=6908fbe00d8039e0a458eb35173b60fe914a6705042cac222172967d733fe38b
  Stored in directo

In [11]:
import os
import shutil
from sklearn.model_selection import train_test_split
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch import nn, optim
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.cuda.amp import GradScaler, autocast
from torchvision.models import resnet50
import torch.multiprocessing as mp
import torch.distributed as dist
import dask.array as da
import dask.dataframe as dd
from dask_image.imread import imread
import pandas as pd

In [2]:
os.listdir("/kaggle/input/")

['hms-brain-activity']

# Data Augmentation

In [2]:
def create_splits(data_dir, output_dir, train_size=0.7, val_size=0.15, test_size=0.15):
    classes = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
    splits = {'train': train_size, 'val': val_size, 'test': test_size}

    # Ensure the split sizes sum to 1
    assert sum(splits.values()) == 1, "Sum of split sizes should be 1."

    # Create output directories for the splits
    for split in splits.keys():
        for cls in classes:
            os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)

    # Process each class directory
    for cls in classes:
        class_dir = os.path.join(data_dir, cls)
        images = [img for img in os.listdir(class_dir) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
        # Create stratified splits
        train_val, test = train_test_split(images, test_size=splits['test'], random_state=42, stratify=None)
        train, val = train_test_split(train_val, test_size=splits['val'] / (splits['train'] + splits['val']), random_state=42, stratify=None)

        # Function to copy files to their respective directories
        def copy_files(files, split):
            for f in files:
                src = os.path.join(class_dir, f)
                dst = os.path.join(output_dir, split, cls, f)
                shutil.copy(src, dst)

        # Copy files to respective split directories
        copy_files(train, 'train')
        copy_files(val, 'val')
        copy_files(test, 'test')

if __name__ == "__main__":
    data_dir = '/kaggle/input/hms-brain-activity/dataset/'
    output_dir = '/kaggle/working/dataset_splits/'
    create_splits(data_dir, output_dir)

In [21]:
# Resnet 50 - Fine-Tuning / Transfer Learning on Spectrogram Dataset

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Data directories
data_dir = '/kaggle/working/dataset_splits/'
train_dir = f'{data_dir}/train'
val_dir = f'{data_dir}/val'
test_dir = f'{data_dir}/test'

# Transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Data Loaders
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
val_dataset = datasets.ImageFolder(val_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model setup
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))  # Adjusting the last layer
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Training Function
def train_model(num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')

        validate_model()

# Validation Function
def validate_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total:.2f}%')

# Main
if __name__ == "__main__":
    train_model(num_epochs=10)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:04<00:00, 25.0MB/s]


Epoch 1/10, Loss: 1.8131
Validation Accuracy: 25.51%
Epoch 2/10, Loss: 1.7796
Validation Accuracy: 26.73%
Epoch 3/10, Loss: 1.7760
Validation Accuracy: 28.41%
Epoch 4/10, Loss: 1.7589
Validation Accuracy: 28.22%
Epoch 5/10, Loss: 1.7668
Validation Accuracy: 26.24%
Epoch 6/10, Loss: 1.7683
Validation Accuracy: 28.10%
Epoch 7/10, Loss: 1.7627
Validation Accuracy: 25.67%
Epoch 8/10, Loss: 1.7572
Validation Accuracy: 25.29%
Epoch 9/10, Loss: 1.7509
Validation Accuracy: 29.59%
Epoch 10/10, Loss: 1.7508
Validation Accuracy: 25.89%


In [3]:
torch.cuda.device_count()

2

In [None]:
# Distributed Data Parallelizm

# Data directories
data_dir = '/kaggle/working/dataset_splits/'
train_dir = f'{data_dir}/train'
val_dir = f'{data_dir}/val'
test_dir = f'{data_dir}/test'

# def get_data_loader(batch_size, data_path):
#     transform = transforms.Compose([
#         transforms.Resize(256),
#         transforms.CenterCrop(224),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ])
# #     train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
#     _dataset = datasets.ImageFolder(data_path, transform=transform)
#     data_loader = DataLoader(_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
#     return data_loader

# With augmentation using dask
def get_data_loader(batch_size, data_path, train=True):
    if train:
        transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.RandomRotation(10),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    else:
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
    _dataset = datasets.ImageFolder(data_path, transform=transform)
    data_loader = DataLoader(_dataset, batch_size=batch_size, shuffle=train, num_workers=4, pin_memory=True)
    return data_loader

def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12355'
    dist.init_process_group("gloo", rank=rank, world_size=world_size)
    torch.cuda.set_device(rank)

def save_checkpoint(model, optimizer, epoch, filename="checkpoint.pth"):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.module.state_dict(),  # Note: unwrap the model from DDP
        'optimizer_state_dict': optimizer.state_dict(),
    }
    torch.save(checkpoint, filename)

def train(model, loader, optimizer, criterion, scaler, rank, epoch, save_interval):
    model.train()
    total_loss = 0.0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(rank), target.to(rank)
        optimizer.zero_grad()
        with autocast():  # Using automatic mixed precision
            output = model(data)
            loss = criterion(output, target)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

        if rank == 0 and batch_idx % save_interval == 0:
            save_checkpoint(model, optimizer, epoch, filename=f"checkpoint_epoch_{epoch}_batch_{batch_idx}.pth")
    
    return total_loss / len(loader)

# Validation Function
def validate_model(model, val_loader, rank):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(rank), labels.to(rank)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total:.2f}%')


def ddp_train(rank, world_size, epochs):
    global train_dir, val_dir
    setup(rank, world_size)
    model = resnet50().to(rank)
    model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])
    batch_size = 256
    train_loader = get_data_loader(batch_size // world_size, train_dir, train=True)
    val_loader = get_data_loader(batch_size // world_size, val_dir, train=False)
    criterion = nn.CrossEntropyLoss().to(rank)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    scaler = GradScaler()  # Initialize the gradient scaler for AMP
    save_interval = 100  # Define your interval for saving checkpoints

    for epoch in range(epochs):
        loss = train(model, train_loader, optimizer, criterion, scaler, rank, epoch, save_interval)
        if rank == 0:
            print(f"Epoch {epoch+1}, Loss: {loss}")
            save_checkpoint(model, optimizer, epoch, filename=f"checkpoint_epoch_{epoch}.pth")
            
            validate_model(model, val_loader, rank)

    if rank == 0:
        save_checkpoint(model, optimizer, epochs, filename="final_checkpoint.pth")
    dist.destroy_process_group()

if __name__ == "__main__":
    world_size = 2 
    epochs = 20
#     mp.spawn(ddp_train, args=(world_size, epochs), nprocs=world_size, join=True)
    processes = []
    #mp.set_start_method("spawn")
    for rank in range(world_size):
        p = mp.Process(target=ddp_train, args=(rank, world_size, epochs))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

Epoch 1, Loss: 3.557675179094076
Validation Accuracy: 19.19%
Epoch 2, Loss: 1.8107595816254616
Validation Accuracy: 19.19%
Epoch 3, Loss: 1.8026292957365513
Validation Accuracy: 18.24%
Epoch 4, Loss: 1.796021254112323
Validation Accuracy: 18.16%
Epoch 5, Loss: 1.7934595321615536
Validation Accuracy: 22.05%
Epoch 6, Loss: 1.7822927708427112
Validation Accuracy: 22.05%
Epoch 7, Loss: 1.7736726043124993
Validation Accuracy: 23.80%
Epoch 8, Loss: 1.7747712917625904
Validation Accuracy: 24.18%
Epoch 9, Loss: 1.7644984213014443
Validation Accuracy: 24.83%
Epoch 10, Loss: 1.7621460258960724
Validation Accuracy: 24.52%
Epoch 11, Loss: 1.7574532441794872
Validation Accuracy: 22.92%
Epoch 12, Loss: 1.7439448311924934
Validation Accuracy: 24.60%
Epoch 13, Loss: 1.7423655614256859
Validation Accuracy: 26.24%
Epoch 14, Loss: 1.737213892241319
Validation Accuracy: 26.12%
Epoch 15, Loss: 1.7350679946442444
Validation Accuracy: 28.03%
Epoch 16, Loss: 1.7264911110202472
Validation Accuracy: 26.92%
Epoc