In [1]:
import os
import yaml
import json
import wandb
import torch
import shutil
import torchvision
import torch.nn.functional as F

from PIL import Image
from roboflow import Roboflow
from torch.utils.data import Dataset
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
from datasets import load_dataset, DatasetDict, load_metric
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


## Dataset


In [2]:
class ChordsDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        
        with open(annotation_file, 'r') as f:
            self.coco = json.load(f)
        
        self.images = self.coco['images']
        self.annotations = self.coco['annotations']
        self.categories = self.coco['categories']

        self.images = {img['id']: img for img in self.images}

        self.nr_of_classes = len(self.categories)

        # Create a mapping from image_id to annotations
        self.image_to_label = {}
        for annotation in self.annotations:
            img = self.images[annotation['image_id']]
            self.image_to_label[img["id"]] = {
                "file_name": img["file_name"],
                "category": annotation["category_id"]
            }

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        metadata = self.image_to_label[idx]
        img_path = os.path.join(self.root_dir, metadata["file_name"])
        image = Image.open(img_path).convert('RGB')        
        
        if self.transform:
            image = self.transform(image)

        # Convert labels to tensor
        return {
            "image": image,
            "label": torch.tensor(metadata["category"]),
        }

In [3]:
def download_roboflow_data(config):
    """
    Download dataset from RoboFlow.
    """
    roboflow_config = config['data']['roboflow']
    roboflow = Roboflow(api_key=roboflow_config["api_key"])
    project = roboflow.workspace(roboflow_config["workspace"]).project(roboflow_config["project"])
    version = project.version(roboflow_config["version"])
    dataset = version.download(model_format=roboflow_config["version_download"])

    dest_path = config['data']['path'] + "/" + dataset.name

    if not os.path.exists(dest_path):
        shutil.move(src=dataset.location, dst=dest_path)

    print(f"Dataset downloaded and extracted to {config['data']['path']}")
    return dataset, dest_path

In [4]:
def load_config(config_path):
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)

In [5]:
def create_transform(aug_config, processor):
    transform_list = []
    
    # Add transforms based on configuration
    # if 'random_resize_crop' in aug_config:
    #     transform_list.append(transforms.RandomResizedCrop(**aug_config['random_resize_crop']))
    # if 'random_horizontal_flip' in aug_config:
    #     transform_list.append(transforms.RandomHorizontalFlip(aug_config['random_horizontal_flip']))
    # if 'color_jitter' in aug_config:
    #     transform_list.append(transforms.ColorJitter(**aug_config['color_jitter']))
    # if 'random_rotation' in aug_config:
    #     transform_list.append(transforms.RandomRotation(aug_config['random_rotation']))
    
    # Always include resizing, ToTensor, and normalization
    transform_list.extend([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # transforms.Normalize(mean=processor.image_mean, std=processor.image_std),
    ])
    
    return transforms.Compose(transform_list)

In [6]:
def get_transforms(config, processor):
    train_transform = create_transform(config['data']['train_augmentation'], processor)
    val_transform = create_transform(config['data'].get('val_augmentation', {}), processor)
    
    return train_transform, val_transform

In [7]:
def load_data(data_dir, transform):
    return datasets.ImageFolder(data_dir, transform=transform)

In [18]:
def organize_images_by_class(root_dir, destination):
    # List of subdirectories to process
    subdirs = ['train', 'valid', 'test']
    
    # Process all images from train, valid, and test directories    
    for subdir in subdirs:
        dir_path = os.path.join(root_dir, subdir)
        
        # Skip if the directory doesn't exist
        if not os.path.exists(dir_path):
            print(f"Directory {dir_path} not found. Skipping...")
            continue

        # Get all image files in the directory
        image_files = [f for f in os.listdir(dir_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

        # Move and organize images
        for img in image_files:
            first_letter = img[0].upper()
            letter_dir = os.path.join(destination, first_letter)
            
            # Create letter directory if it doesn't exist
            os.makedirs(letter_dir, exist_ok=True)

    print("Image organization complete!")

In [9]:
f_run_config = "config.yml"
f_wandb_config = "wandb.yml" 

In [10]:
# Load configuration
config = load_config(f_run_config)
wandb_config = load_config(f_wandb_config)

In [21]:
# Download data from RoboFlow if specified
if config['data'].get('use_roboflow', False):
    _, location = download_roboflow_data(config)

destination = "/datasets"

organize_images_by_class(location, destination)

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Guitar-Chord-1 to coco:: 100%|██████████| 166698/166698 [00:24<00:00, 6725.74it/s]





Extracting Dataset Version Zip to Guitar-Chord-1 in coco:: 100%|██████████| 2525/2525 [00:00<00:00, 4127.68it/s]


Dataset downloaded and extracted to ./Project/src/classification/dataset


PermissionError: [Errno 13] Permission denied: '/datasets'

In [None]:
# Initialize wandb
wandb.require("core")
wandb.init(
    project=wandb_config["project"],
    name=wandb_config['name'] + "-" + wandb.util.generate_id(),
    config=wandb_conf,
    entity=wandb_config["entity"]
)

In [None]:
 # Load pre-trained model and processor
model = ViTForImageClassification.from_pretrained(config['model']['pretrained_weights'])
processor = ViTImageProcessor.from_pretrained(config['model']['pretrained_weights'])

In [None]:
# Get transforms
train_transform, base_transform = get_transforms(config, processor)

In [None]:
# Load the ds
root_dir = 'path/to/custom_dataset'
ds = load_dataset("imagefolder", data_dir=root_dir)

# Split the data
ds = ds['train'].train_test_split(test_size=0.3, stratify_by_column="label")  # 70% train, 30% test
ds_test = ds['test'].train_test_split(test_size=0.5, stratify_by_column="label")  # 30% test --> 15% valid, 15% test
ds = DatasetDict({
    'train': ds['train'],
    'test': ds_test['test'],
    'valid': ds_test['train']
})
    
del ds_test

ds

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir=config['training']['output_dir'],
    num_train_epochs=config['training']['num_epochs'],
    per_device_train_batch_size=config['training']['batch_size'],
    per_device_eval_batch_size=config['training']['batch_size'],
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=float(config['training']['learning_rate']),
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="wandb",
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
    compute_metrics=lambda p: {"accuracy": (p.predictions.argmax(-1) == p.label_ids).mean()},
)

# # # Train the model
trainer.train()

# # Save the fine-tuned model
# trainer.save_model(config['training']['final_model_path'])

# # Close wandb run
# wandb.finish()