In [None]:
import pandas as pd  # circular import?
import torch
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import timm
from sklearn.model_selection import train_test_split
from prodigyopt import Prodigy
from torchsampler import ImbalancedDatasetSampler
import lightning as L
import wandb
from sklearn.metrics import f1_score, accuracy_score#, classification_report
import torch.nn.functional as F
import os

print(torch.__version__)

In [2]:
L.fabric.accelerators.mps.MPSAccelerator.is_available()

False

In [3]:
# PARAMS
single_shot = False
learning_rate = 0.001  # Doesn't matter when using prodigy optimizer
use_prodigy = True
epochs = 7  # Max_limit
batch_size = 64
architecture = "vit_base_patch16_224"
os.environ['WANDB_NOTEBOOK_NAME'] = 'week3-mps-class4.ipynb'
use_class4 = False


if use_class4 == True:
    class_names  = ['0', '1', '2', '3', '4']
else:
    class_names  = ['0', '1', '2', '3']

    
if use_prodigy:
    optimizer = "Prodigy"
else:
    optimizer = "Adam"
    learning_rate = "Prodigy"

run_name = f'max_ep-{epochs}_bs-{batch_size}_cls4-{use_class4}_earlystop_{architecture}'

In [6]:
obj = timm.create_model(
            architecture, pretrained=True, num_classes=4)

In [9]:
timm.list_models()

['bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'beitv2_base_patch16_224',
 'beitv2_large_patch16_224',
 'botnet26t_256',
 'botnet50ts_256',
 'caformer_b36',
 'caformer_m36',
 'caformer_s18',
 'caformer_s36',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_medium',
 'coat_lite_medium_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_small',
 'coat_tiny',
 'coatnet_0_224',
 'coatnet_0_rw_224',
 'coatnet_1_224',
 'coatnet_1_rw_224',
 'coatnet_2_224',
 'coatnet_2_rw_224',
 'coatnet_3_224',
 'coatnet_3_rw_224',
 'coatnet_4_224',
 'coatnet_5_224',
 'coatnet_bn_0_rw_224',
 'coatnet_nano_cc_224',
 'coatnet_nano_rw_224',
 'coatnet_pico_rw_224',
 'coatnet_rmlp_0_rw_224',
 'coatnet_rmlp_1_rw2_224',
 'coatnet_rmlp_1_r

In [4]:
class ViTModel(L.LightningModule):
    def __init__(
        self,
        num_classes,
        learning_rate=learning_rate,
        use_prodigy=use_prodigy,
    ):
        super().__init__()
        self.model = timm.create_model(
            architecture, pretrained=True, num_classes=num_classes
        )  # Get model architecture
        #self.f1 = F1Score(task="multiclass", num_classes=num_classes, average="macro")
        self.save_hyperparameters()
        self.predictions = []
        self.labels = []
        self.num_classes = num_classes

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        if use_prodigy:
            return Prodigy(self.model.parameters())
        else:
            return torch.optim.Adam(self.model.parameters(), lr=learning_rate)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_epoch=True)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        preds = torch.argmax(y_hat, dim=1)
        val_accuracy = (preds == y).float().mean()

        self.log_dict(
            {"val_loss": loss, "val_accuracy": val_accuracy},
            on_step=True,
            on_epoch=True,
            prog_bar=True,
        )

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        preds = torch.argmax(logits, dim=1)
        # Store preds and labels for later use in on_test_epoch_end
        self.predictions.append(preds)
        self.labels.append(y)
        # Optionally compute and return the loss if you want to log it
        loss = F.cross_entropy(logits, y)
        self.log('test_loss', loss)
        return {'loss': loss}

    def on_test_epoch_end(self):
        # Concatenate all the predictions and labels collected from each test_step
        preds = torch.cat(self.predictions, dim=0)
        labels = torch.cat(self.labels, dim=0)
        # Convert to CPU numpy arrays for sklearn
        preds = preds.cpu().numpy()
        labels = labels.cpu().numpy()
        # Reset predictions and labels list to avoid duplicate entries on multiple test runs
        self.predictions = []
        self.labels = []
        # Log the confusion matrix to wandb
        wandb.log({"confusion_matrix": wandb.plot.confusion_matrix(probs=None, y_true=labels, preds=preds, class_names=class_names)})

        # Calculate and log overall F1-score and accuracy to wandb
        f1 = f1_score(labels, preds, average='weighted')
        acc = accuracy_score(labels, preds)
        wandb.log({"overall_f1_score": f1, "overall_accuracy": acc})

if use_class4 == True:
    vit = ViTModel(num_classes=5, use_prodigy=use_prodigy)
else:
    vit = ViTModel(num_classes=4, use_prodigy=use_prodigy)

In [5]:
class CustomDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        img_path = f'img/{img_name}'
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.df.iloc[idx, 2], dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label
    def get_labels(self):
        label = torch.tensor(self.df.iloc[:, 2].tolist(), dtype=torch.long)
        return label

In [6]:
def init_dataloaders():
        #DATA LABELS
    df = pd.read_csv('img_labels_ALL.csv')

    # Remove class 4 (images lablelled as bad examples)?
    if use_class4 == False:
        df = df[df['score'] != 4.0]
    #df.head() #Sanity check

    # Only use images that exist in the directory
    image_folder = 'img'  
    image_exists = df['img'].apply(lambda x: os.path.isfile(os.path.join(image_folder, x)))
    filtered_df = df[image_exists]
    print(f"Original DataFrame size: {len(df)}, Filtered DataFrame size: {len(filtered_df)}") #Sanity check
    df = filtered_df

    # Stratify/balance classes across splits
    labels = df['score'].values
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=labels)
    train_labels = train_df['score'].values
    train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_labels)

    # Create a transform for the images
    transform = timm.data.create_transform(
        **timm.data.resolve_data_config(vit.model.pretrained_cfg))

    # Create data loaders for training and validation sets
    test_data = CustomDataset(test_df, transform)
    train_data = CustomDataset(train_df, transform)
    val_data = CustomDataset(val_df, transform)

    #More workers for GPU/lambda
    if torch.cuda.is_available():
        num_workers_local = 13
        test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers_local)
        train_loader = DataLoader(train_data, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_data), num_workers=num_workers_local)
        val_loader = DataLoader(val_data, batch_size=batch_size, num_workers=num_workers_local)
    else:
        test_loader = DataLoader(test_data, batch_size=batch_size)
        train_loader = DataLoader(train_data, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_data))
        val_loader = DataLoader(val_data, batch_size=batch_size)
    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = init_dataloaders()


Original DataFrame size: 1738, Filtered DataFrame size: 1735


In [7]:
# from lightning.pytorch.loggers import WandbLogger
# from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

# wandb_logger = WandbLogger(project="BA1", name=run_name)

# # Assuming you have a metric to monitor, e.g., 'val_loss'
# early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# # Assuming you have a run_name variable defined somewhere in your code
# model_checkpoint = ModelCheckpoint(
#     monitor='val_loss',
#     dirpath='./models',
#     filename=f'{run_name}_epoch-{{epoch}}-{{val_loss:.2f}}_early_stopped',
#     save_top_k=1,
#     mode='min',
#     every_n_epochs=1
# )

# trainer = L.Trainer(
#     max_epochs=epochs,
#     log_every_n_steps=1,
#     logger=wandb_logger,
#     callbacks=[early_stopping, model_checkpoint],  # Add the early stopping callback here
# )
# #default_root_dir="./models", #if starting from prev checkpoint

In [8]:
# wandb_logger.watch(vit)
# # Dynamically update W&B configuration
# wandb.config.update({
#     "single_shot": single_shot,
#     "max_epochs": epochs,
#     "batch_size": batch_size,  # Assuming this is how you access batch size
#     "learning_rate": learning_rate,  # Dynamically get the learning rate from optimizer
#     "optimizer": optimizer,  # Dynamically get the optimizer class name
#     "model_architecture": architecture,
#     'use_class4': use_class4,
#     # Include any other dynamic parameters here
#     })

In [None]:
import wandb
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import LearningRateMonitor
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint

def train(sweep_config):
    epochs = sweep_config['parameters']['epochs']['values'][0]
    batch_size = sweep_config['parameters']['batch_size']['values'][0]
    architecture = sweep_config['parameters']['architecture']['values'][0]
    use_class4 = sweep_config['parameters']['use_class4']['values'][0]
    use_prodigy=True,

    model = ViTModel(num_classes = 4 if use_class4 == True else 5, use_prodigy=use_prodigy)

    train_loader, val_loader, test_loader = init_dataloaders()
    trainer.fit(model, train_loader, val_loader)
    trainer.test(model, test_loader)

sweep_config = {
    'name': 'week3-mps-class4-sweep1',
    'method': 'random',
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'epochs': {
            'values': [5, 6, 7]
        },
        'batch_size': {
            'values': [64]
        },
        'architecture': {
            'values': ["vit_base_patch16_224"]
        },
        'use_class4': {
            'values': [False]
        },
    }
}

sweep_id = wandb.sweep(sweep_config, project="BA1")

wandb_logger= WandbLogger(project='BA1', group='week3-mps-class4-sweep1')

trainer = Trainer(
    logger=wandb_logger,
    callbacks=[
        LearningRateMonitor(), 
        EarlyStopping(monitor='val_loss', patience=3),
        ModelCheckpoint(monitor='val_loss', dirpath='./models', filename='{epoch}-{val_loss:.2f}', save_top_k=1, mode='min')
    ]
)



wandb.agent(sweep_id, train)  # This will run the sweep

In [10]:
# try:
#     trainer.fit(vit, train_loader, val_loader)
# except KeyboardInterrupt:
#     print("Training interrupted by user. Saving model...")
#     # Perform any necessary cleanup or save operations here
#     trainer.save_checkpoint("interrupted_model.ckpt")
#     print("Model saved successfully.")



# # automatically restores model, epoch, step, LR schedulers, etc...
# #trainer.fit(vit, train_loader, val_loader, ckpt_path="BA1/n9o5487v/checkpoints/epoch=0-step=26.ckpt")


## parallel evaluations of test set

In [11]:
# report = trainer.test(vit, test_loader)
# # print(report)


In [12]:
# import datetime

# # Get the current date and time
# now = datetime.datetime.now()

# # Format the date and time as a string
# timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")

# # Include the timestamp in the filename
# torch.save(vit.state_dict(), f"models/{architecture}_{timestamp}_{epochs}.pt")

In [13]:
# pip freeze > pip_requirements.txt

# pip freeze | grep -v ' @ ' > pip_requirements.txt
