In [None]:
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

## Run Submission

In [None]:
from typing import List, Tuple

import hydra
import lightning as pl
from omegaconf import DictConfig
from lightning import Callback, LightningDataModule, LightningModule, Trainer
from lightning.pytorch.loggers import Logger

from terralearn import utils

import torch
import hydra
import omegaconf
import pyrootutils
import pandas as pd
from tqdm import tqdm
import numpy as np
import pickle
import timm
import matplotlib.pyplot as plt

trait_columns = [
    "X4_mean",
    "X11_mean",
    "X18_mean",
    "X50_mean",
    "X26_mean",
    "X3112_mean",
]
sub_cols = [i.replace("_mean", "") for i in trait_columns]
from sklearn.metrics import mean_squared_error, r2_score
from torchmetrics.regression import R2Score
from fgvc.models.plant_traits_model import *
from torchmetrics.functional import r2_score
from glob import glob

### Setup

In [None]:
device = 'cuda:0'
# setup the datamodule
cfg = omegaconf.OmegaConf.load("/home/ubuntu/FGVC11/configs/data/plant_traits_data.yaml")
cfg.batch_size = 128
datamodule: LightningDataModule = hydra.utils.instantiate(cfg)
datamodule.setup()

# setup the model
cfg = omegaconf.OmegaConf.load("/home/ubuntu/FGVC11/configs/model/plant_traits_model.yaml")
cfg.reg_traits = True
cfg.clf_traits = True
cfg.bld_traits = True
cfg.soft_clf_traits = True
cfg.model.body = "vitl"
model = hydra.utils.instantiate(cfg)
# load the checkpoint
model = model.load_from_checkpoint("/path/to/ckpt", map_location=device);
model = model.train();
model = model.eval();
model = model.to(device);

### Inference on test data

In [None]:
df_test = pd.read_csv('/home/ubuntu/FGVC11/data/PlantTrait/test.csv')
df_test['path'] = '/home/ubuntu/FGVC11/data/PlantTrait/test_images/' + df_test['id'].astype(str) + '.jpeg'

In [None]:
reg_pred = []
clf_pred = []
clf_soft_pred = []
bld_pred = []
i = 0
for batch in tqdm(datamodule.test_dataloader()):
    # Unpack the batch
    x, x_ = batch["image"].to(device), batch["metadata"].to(device)
    # Move data to the device

    # Make predictions
    with torch.no_grad():
        pred_enc, specie_logits = model.model.forward_alt(x, x_)
        
        if model.reg_traits:
            assert not torch.isnan(pred_enc).any()
            # raw predicted label
            pred = model.model.le.inverse_transform(pred_enc.clone().detach())
            bld_traits = torch.zeros_like(pred)
            reg_pred.append(pred.cpu())
        
        if model.clf_traits:
            pred_specie = torch.argmax(specie_logits, dim=1)
            pred_specie_traits = model.specie_traits[pred_specie]
            bld_traits = torch.zeros_like(pred_specie_traits)
            clf_pred.append(pred_specie_traits.cpu())
        
        if model.soft_clf_traits:
            specie_probs = F.softmax(specie_logits, dim=1)
            pred_specie_traits_soft = torch.matmul(
                specie_probs,
                model.specie_traits
                )
            bld_traits = torch.zeros_like(pred_specie_traits_soft)
            clf_soft_pred.append(pred_specie_traits_soft.cpu())
            
        if model.bld_traits:
            assert (
                sum([model.reg_traits, model.clf_traits, model.soft_clf_traits]) > 1
            ), "At least two heads should be active to blend traits"
            # bld_traits = torch.zeros_like(dummy_traits)
            denominator = torch.zeros_like(model.dummy_weights)
            if model.reg_traits:
                bld_traits += model.reg_weight * pred
                denominator += model.reg_weight
            if model.clf_traits:
                bld_traits += model.clf_weight * pred_specie_traits
                denominator += model.clf_weight
            if model.soft_clf_traits:
                bld_traits += model.soft_clf_weight * pred_specie_traits_soft
                denominator += model.soft_clf_weight
            bld_traits = bld_traits / denominator
            bld_pred.append(bld_traits.cpu())

# Concatenate predictions from all batches
if model.reg_traits:
    reg_pred = torch.concat(reg_pred, dim=0).numpy()
if model.clf_traits:
    clf_pred = torch.concat(clf_pred, dim=0).numpy()
if model.soft_clf_traits:
    clf_soft_pred = torch.concat(clf_soft_pred, dim=0).numpy()
if model.bld_traits:
    bld_pred = torch.concat(bld_pred, dim=0).numpy()

In [None]:
# create submission dfs sanity check submission with the best submission so far
if model.reg_traits:
    reg_df = pd.DataFrame({
        'id': df_test['id'].values,
        'X4': reg_pred[:, 0],
        'X11': reg_pred[:, 1],
        'X18': reg_pred[:, 2],
        'X50': reg_pred[:, 3],
        'X26': reg_pred[:, 4],
        'X3112': reg_pred[:, 5],
    })
if model.clf_traits:
    clf_df = pd.DataFrame({
        'id': df_test['id'].values,
        'X4': clf_pred[:, 0],
        'X11': clf_pred[:, 1],
        'X18': clf_pred[:, 2],
        'X50': clf_pred[:, 3],
        'X26': clf_pred[:, 4],
        'X3112': clf_pred[:, 5],
    })
if model.soft_clf_traits:
    clf_soft_df = pd.DataFrame({
        'id': df_test['id'].values,
        'X4': clf_soft_pred[:, 0],
        'X11': clf_soft_pred[:, 1],
        'X18': clf_soft_pred[:, 2],
        'X50': clf_soft_pred[:, 3],
        'X26': clf_soft_pred[:, 4],
        'X3112': clf_soft_pred[:, 5],
    })
if model.bld_traits:
    bld_df = pd.DataFrame({
        'id': df_test['id'].values,
        'X4': bld_pred[:, 0],
        'X11': bld_pred[:, 1],
        'X18': bld_pred[:, 2],
        'X50': bld_pred[:, 3],
        'X26': bld_pred[:, 4],
        'X3112': bld_pred[:, 5],
    })
sub = pd.read_csv('avg.csv')
if model.reg_traits:
    print(r2_score(torch.tensor(sub[sub.columns[1:]].values), torch.tensor(reg_df[sub.columns[1:]].values)))
if model.clf_traits:
    print(r2_score(torch.tensor(sub[sub.columns[1:]].values), torch.tensor(clf_df[sub.columns[1:]].values)))
if model.soft_clf_traits:
    print(r2_score(torch.tensor(sub[sub.columns[1:]].values), torch.tensor(clf_soft_df[sub.columns[1:]].values)))
if model.bld_traits:
    print(r2_score(torch.tensor(sub[sub.columns[1:]].values), torch.tensor(bld_df[sub.columns[1:]].values)))

In [None]:
# save the submission
m_name = "experiment name"
if model.reg_traits:
    reg_df.to_csv(f'{m_name}_reg.csv', index=False)
if model.clf_traits:
    clf_df.to_csv(f'{m_name}_clf.csv', index=False)
if model.soft_clf_traits:
    clf_soft_df.to_csv(f'{m_name}_clf_soft.csv', index=False)
if model.bld_traits:
    bld_df.to_csv(f'{m_name}_bld.csv', index=False)

In [None]:
!kaggle competitions submit -c planttraits2024 -f sub.csv -m "sub"

### Ensemble

In [None]:
df_names = [
   ("v2_4_reg.csv", 1),
    ("v2_4_clf.csv", 1),
    ("v2_4_4_reg.csv", 1),
    ("v2_4_4_clf.csv", 1),
    ("v2_1_2_reg.csv", 1),
    ("v2_1_2_clf.csv", 1),
    ("v2_1_3_reg.csv", 1),
    ("v2_1_3_clf.csv", 1),
    ("v4_all_v2_all_data_clf_soft.csv", 5),
    ("v4_all_v2_vitl_all_data_clf_soft.csv", 7),
    ("v5_vitb_clf_soft.csv", 7),
    ("v5_vitl_clf_soft.csv", 9),
    ("v5_vitb_alldata_clf_soft.csv", 9),
    ("v5_vitb_alldata_clf_soft.csv", 9),
    ("v5_vitl_alldata_clf_soft.csv", 11),
    ("v5_vitl_alldata_6blk_clf_soft.csv", 11),
    ("v6_vitl_alldata_8blk_clf_soft.csv", 13),
    ]

# load the csvs and take weighted average of the predictions for the sub_cols columns and out put final csv
out = pd.DataFrame()
weight_sum = 0
for filename, weight in df_names:
    df = pd.read_csv(filename)
    if len(out) != 0:
        out += df[sub_cols] * weight
    else:
        out = df[sub_cols] * weight
    weight_sum += weight

out = out / weight_sum
df_avg = pd.DataFrame()
df_avg["id"] = df["id"]
df_avg[sub_cols] = out

In [None]:
df_avg.to_csv("avg.csv", index=False)

In [None]:
!kaggle competitions submit -c planttraits2024 -f avg.csv -m "avg"