In [None]:
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

## Steps
1. Join the kaggle contest - https://www.kaggle.com/competitions/planttraits2024
2. Install kaggle cli - https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md
3. Download the data - `kaggle competitions download -c planttraits2024`
4. Unzip the data
5. Install FGVC repo - `pip install -e .` and `pip install -r requirement.txt` in the desired env
5. Train the model

## Setup

In [None]:
import pandas as pd
import numpy as np

In [None]:
df_train = pd.read_csv('/home/ubuntu/FGVC11/data/PlantTrait/train.csv')
df_train['path'] = '/home/ubuntu/FGVC11/data/PlantTrait/train_images/' + df_train['id'].astype(str) + '.jpeg'
df_train.to_csv('/home/ubuntu/FGVC11/data/PlantTrait/df_train.csv', index=False)

df_test = pd.read_csv('/home/ubuntu/FGVC11/data/PlantTrait/test.csv')
df_test['path'] = '/home/ubuntu/FGVC11/data/PlantTrait/test_images/' + df_test['id'].astype(str) + '.jpeg'
df_test.to_csv('/home/ubuntu/FGVC11/data/PlantTrait/df_test.csv', index=False)

## Submission

In [None]:
from typing import List, Tuple

import hydra
import lightning as pl
from omegaconf import DictConfig
from lightning import Callback, LightningDataModule, LightningModule, Trainer
from lightning.pytorch.loggers import Logger

from terralearn import utils

import torch
import hydra
import omegaconf
import pyrootutils
import pandas as pd
from tqdm import tqdm
import numpy as np

In [None]:
cfg = omegaconf.OmegaConf.load("/home/ubuntu/FGVC11/configs/data/plant_traits_data.yaml")
datamodule: LightningDataModule = hydra.utils.instantiate(cfg)
datamodule.setup()

cfg = omegaconf.OmegaConf.load("/home/ubuntu/FGVC11/configs/model/plant_traits_model.yaml")
model = hydra.utils.instantiate(cfg)

model.load_from_checkpoint("/home/ubuntu/FGVC11/logs/train/runs/2024-04-04_08-39-28/checkpoints/epoch_058.ckpt")
device = 'cuda'
model.to(device)

In [None]:
all_predictions = []
for batch in tqdm(datamodule.test_dataloader()):
    # Unpack the batch
    images = batch["image"]

    # Move data to the device
    images = images.to(device)

    # Make predictions
    with torch.no_grad():
        predictions = model.forward(images)
    # break

    # Move predictions back to CPU if necessary
    predictions = predictions.cpu().numpy()

    # Append predictions to the list
    all_predictions.append(predictions)

# Concatenate predictions from all batches
all_predictions = np.concatenate(all_predictions, axis=0)

# Create a DataFrame with the predictions and corresponding IDs
submission_df = pd.DataFrame({
    'id': df_test['id'].values,
    'X4': all_predictions[:, 0],
    'X11': all_predictions[:, 1],
    'X18': all_predictions[:, 2],
    'X50': all_predictions[:, 4],
    'X26': all_predictions[:, 3],
    'X3112': all_predictions[:, 5],
})
submission_df.to_csv('submission.csv', index=False)

In [None]:
!kaggle competitions submit -c planttraits2024 -f submission.csv -m "Message"