In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from fungiclef.model.dataset import FungiDataset
from fungiclef.model.wrapper import FungiModel
from fungiclef.model.transforms import get_transforms
from fungiclef.model.vit_model import classifier_model
import pandas as pd

from torch.utils.data import DataLoader
import lightning as L

In [3]:
TRAIN_PQ_LOCATION = (
    "../part-00000-592f41dc-03ed-4b8e-80d5-182bf83d55ec-c000.snappy.parquet"
)
VALID_PQ_LOCATION = (
    "../part-00000-c05be10c-f201-43bd-a795-e3e92bba7f3f-c000.snappy.parquet"
)

In [4]:
# Fetch data parquet
train_df = pd.read_parquet(TRAIN_PQ_LOCATION)
valid_df = pd.read_parquet(VALID_PQ_LOCATION)

In [5]:
# Load it as torch dataset
train_dataset = FungiDataset(train_df, transform=get_transforms(data="train"))
valid_dataset = FungiDataset(valid_df, transform=get_transforms(data="valid"))

In [6]:
# Define model. Here we are using a pretrained vision transformer similar to the training the other guys did
PRETRAINED_PATH = "../pretrained_ViT_1605_classes.pth"
N_CLASSES = 1605  # 1604 classes + 1 unknown class

model = classifier_model(1605, pretrained_path=PRETRAINED_PATH)

In [7]:
# Load it to dataloader
BATCH_SIZE = 32
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64
ACCUMULATION_STEPS = 64 // BATCH_SIZE
EPOCHS = 10
WORKERS = 4

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1
)
valid_loader = DataLoader(
    valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1
)

In [8]:
# Use our wrapper module to get a PyTorch Lightning trainer

module = FungiModel(model)
trainer = L.Trainer()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/chris/miniconda3/envs/fungiclef/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [10]:
trainer.fit(module, train_loader, valid_loader)

/home/chris/miniconda3/envs/fungiclef/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:653: Checkpoint directory /home/chris/fungiclef-2024/notebooks/lightning_logs/version_3/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | model | VisionTransformer | 304 M 
--------------------------------------------
304 M     Trainable params
0         Non-trainable params
304 M     Total params
1,219.787 Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [17]:
# Evaluate model on test set
from fungiclef.evaluate import DEFUNCT_predict, scoring

TEST_PQ_LOCATION = (
    "../part-00000-c05be10c-f201-43bd-a795-e3e92bba7f3f-c000.snappy.parquet"
)
test_df = pd.read_parquet(TEST_PQ_LOCATION)
test_dataset = FungiDataset(test_df, transform=get_transforms(data="valid"))
test_loader = DataLoader(
    valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1
)

In [39]:
from fungiclef.evaluate.inference import generate_logits, predict_class
from fungiclef.evaluate.scoring import score_model

logits = generate_logits(model, test_loader)
predicted_class = predict_class(logits)


gt_df = test_df[["observationID", "class_id"]]
score_model(predicted_class, gt_df)

Using device: cuda


100%|██████████| 29/29 [00:03<00:00,  8.04it/s]


In [50]:
# We can also use embeddings with a linear layer.
# Assuming we have an embedding size of 384

# TODO: Need to load embeddings instead of images for train / validation stuff. Will get amongst it

from fungiclef.model.embedding_classifier import get_linear_classifier

model = get_linear_classifier(n_classes=N_CLASSES, embedding_size=384)
module = FungiModel(model)
trainer.fit(module, train_loader, valid_loader)

/home/chris/miniconda3/envs/fungiclef/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:653: Checkpoint directory /home/chris/fungiclef-2024/notebooks/lightning_logs/version_3/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | Linear | 617 K 
---------------------------------
617 K     Trainable params
0         Non-trainable params
617 K     Total params
2.472     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/chris/miniconda3/envs/fungiclef/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


RuntimeError: mat1 and mat2 shapes cannot be multiplied (21504x224 and 384x1605)

In [1]:
import torch

torch.cuda.empty_cache()

: 