In [1]:
import pandas as pd  # circular import?
import torch
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import timm
from sklearn.model_selection import train_test_split
from prodigyopt import Prodigy
from torchsampler import ImbalancedDatasetSampler
import lightning as L
from torchmetrics import F1Score
import wandb
from sklearn.metrics import classification_report, f1_score, accuracy_score
import torch.nn.functional as F
import os

print(torch.__version__)

2.2.0


In [2]:
# PARAMS
single_shot = False
learning_rate = 0.001  # Doesn't matter when using prodigy optimizer
use_prodigy = True
epochs = 15  # Max_limit
batch_size = 32
architecture = "vit_base_patch16_224"
os.environ['WANDB_NOTEBOOK_NAME'] = 'week2-lightning.ipynb'
use_class4 = False

run_name = f'epochs-{epochs}_bs-{batch_size}_class4-{use_class4}_{architecture}'

if use_prodigy:
    optimizer = "Prodigy"
else:
    optimizer = "Adam"
    learning_rate = "Prodigy"




In [3]:
class ViTModel(L.LightningModule):
    def __init__(
        self,
        num_classes,
        learning_rate=learning_rate,
        use_prodigy=use_prodigy,
    ):
        super().__init__()
        self.model = timm.create_model(
            architecture, pretrained=True, num_classes=num_classes
        )  # Get model architecture
        self.f1 = F1Score(task="multiclass", num_classes=num_classes, average="macro")
        self.save_hyperparameters()
        self.predictions = []
        self.labels = []
        self.num_classes = num_classes

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        if use_prodigy:
            return Prodigy(self.model.parameters())
        else:
            return torch.optim.Adam(self.model.parameters(), lr=learning_rate)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_epoch=True)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        val_f1 = self.f1(y_hat, y)
        
        preds = torch.argmax(y_hat, dim=1)
        val_accuracy = (preds == y).float().mean()

        self.log_dict(
            {"val_loss": loss, "val_f1": val_f1, "val_accuracy": val_accuracy},
            on_step=True,
            on_epoch=True,
            prog_bar=True,
        )

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        preds = torch.argmax(logits, dim=1)
        # Store preds and labels for later use in on_test_epoch_end
        self.predictions.append(preds)
        self.labels.append(y)
        # Optionally compute and return the loss if you want to log it
        loss = F.cross_entropy(logits, y)
        self.log('test_loss', loss)
        return {'loss': loss}

    def on_test_epoch_end(self):
        # Concatenate all the predictions and labels collected from each test_step
        preds = torch.cat(self.predictions, dim=0)
        labels = torch.cat(self.labels, dim=0)
        # Convert to CPU numpy arrays for sklearn
        preds = preds.cpu().numpy()
        labels = labels.cpu().numpy()
        # Reset predictions and labels list to avoid duplicate entries on multiple test runs
        self.predictions = []
        self.labels = []
        #Log accuracy and f1 to wandb
        
        # Log the confusion matrix to wandb
        wandb.log({"confusion_matrix": wandb.plot.confusion_matrix(probs=None, y_true=labels, preds=preds)})


if use_class4 == True:
    vit = ViTModel(num_classes=5, use_prodigy=use_prodigy)
else:
    vit = ViTModel(num_classes=4, use_prodigy=use_prodigy)

In [4]:
class CustomDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        img_path = f'img/{img_name}'
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.df.iloc[idx, 2], dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label
    def get_labels(self):
        label = torch.tensor(self.df.iloc[:, 2].tolist(), dtype=torch.long)
        return label

In [5]:
#DATA LABELS
df = pd.read_csv('img_labels_ALL.csv')

# Remove class 4 (images lablelled as bad examples)?
if use_class4 == False:
    df = df[df['score'] != 4.0]
#df.head() #Sanity check

# Only use images that exist in the directory
image_folder = 'img'  
image_exists = df['img'].apply(lambda x: os.path.isfile(os.path.join(image_folder, x)))
filtered_df = df[image_exists]
print(f"Original DataFrame size: {len(df)}, Filtered DataFrame size: {len(filtered_df)}") #Sanity check
df = filtered_df

# Stratify/balance classes across splits
labels = df['score'].values
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=labels)
train_labels = train_df['score'].values
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_labels)

# Create a transform for the images
transform = timm.data.create_transform(
     **timm.data.resolve_data_config(vit.model.pretrained_cfg))

# Create data loaders for training and validation sets
test_data = CustomDataset(test_df, transform)
train_data = CustomDataset(train_df, transform)
val_data = CustomDataset(val_df, transform)

#More workers for GPU/lambda
if torch.cuda.is_available():
    num_workers_local = 13
    test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers_local)
    train_loader = DataLoader(train_data, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_data), num_workers=num_workers_local)
    val_loader = DataLoader(val_data, batch_size=batch_size, num_workers=num_workers_local)
else:
    test_loader = DataLoader(test_data, batch_size=batch_size)
    train_loader = DataLoader(train_data, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_data))
    val_loader = DataLoader(val_data, batch_size=batch_size)


Original DataFrame size: 1738, Filtered DataFrame size: 1735


In [6]:
# conda list --export > requirements.txt

In [8]:
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(project="BA1", name=run_name)
trainer = L.Trainer(
    max_epochs=epochs,
    log_every_n_steps=1,
    logger=wandb_logger,
)
#default_root_dir="./models", #if starting from prev checkpoint

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [9]:
wandb_logger.watch(vit)
# Dynamically update W&B configuration
wandb.config.update({
    "single_shot": single_shot,
    "max_epochs": epochs,
    "batch_size": batch_size,  # Assuming this is how you access batch size
    "learning_rate": learning_rate,  # Dynamically get the learning rate from optimizer
    "optimizer": optimizer,  # Dynamically get the optimizer class name
    "model_architecture": architecture,
    'use_class4': use_class4,
    # Include any other dynamic parameters here
    })

[34m[1mwandb[0m: Currently logged in as: [33malexandermittet[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [10]:
trainer.fit(vit, train_loader, val_loader)

# automatically restores model, epoch, step, LR schedulers, etc...
#trainer.fit(vit, train_loader, val_loader, ckpt_path="BA1/n9o5487v/checkpoints/epoch=0-step=26.ckpt")



  | Name  | Type              | Params
--------------------------------------------
0 | model | VisionTransformer | 85.8 M
1 | f1    | MulticlassF1Score | 0     
--------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
343.207   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/alexandermittet/miniconda3/envs/BA/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/alexandermittet/miniconda3/envs/BA/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

/Users/alexandermittet/miniconda3/envs/BA/lib/python3.11/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


## parallel evaluations of test set

In [11]:
report = trainer.test(vit, test_loader)
# print(report)


/Users/alexandermittet/miniconda3/envs/BA/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

{'class_0': {'precision': 0.9248554913294798, 'recall': 0.975609756097561, 'f1-score': 0.9495548961424333, 'support': 164.0}, 'class_1': {'precision': 0.8656716417910447, 'recall': 0.6444444444444445, 'f1-score': 0.7388535031847133, 'support': 90.0}, 'class_2': {'precision': 0.6705882352941176, 'recall': 0.8142857142857143, 'f1-score': 0.7354838709677419, 'support': 70.0}, 'class_3': {'precision': 0.6363636363636364, 'recall': 0.6086956521739131, 'f1-score': 0.6222222222222222, 'support': 23.0}, 'accuracy': 0.8328530259365994, 'macro avg': {'precision': 0.7743697511945695, 'recall': 0.7607588917504082, 'f1-score': 0.7615286231292777, 'support': 347.0}, 'weighted avg': {'precision': 0.8390901684327969, 'recall': 0.8328530259365994, 'f1-score': 0.8300253611897301, 'support': 347.0}}
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
───────────────────────────────────────────

In [12]:
import datetime

# Get the current date and time
now = datetime.datetime.now()

# Format the date and time as a string
timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")

# Include the timestamp in the filename
torch.save(vit.state_dict(), f"models/{run_name}.pt")

In [None]:
# pip freeze > pip_requirements.txt

# pip freeze | grep -v ' @ ' > pip_requirements.txt
