In [1]:
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

## Steps
1. Join the kaggle contest - https://www.kaggle.com/competitions/planttraits2024
2. Install kaggle cli - https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md
3. Download the data - `kaggle competitions download -c planttraits2024`
4. Unzip the data
5. Install FGVC repo - `pip install -e .` and `pip install -r requirement.txt` in the desired env
5. Train the model

## Setup

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import List, Union, Callable

import fiftyone as fo
import fiftyone.core.fields as fof
import fiftyone.brain as fob
import fiftyone.zoo as foz
from fiftyone import ViewField as F
from fiftyone.core.labels import Attribute


In [3]:
df_train = pd.read_csv('/home/ubuntu/FGVC11/data/PlantTrait/train.csv')
df_train['path'] = '/home/ubuntu/FGVC11/data/PlantTrait/train_images/' + df_train['id'].astype(str) + '.jpeg'
df_train.to_csv('/home/ubuntu/FGVC11/data/PlantTrait/df_train.csv', index=False)

df_test = pd.read_csv('/home/ubuntu/FGVC11/data/PlantTrait/test.csv')
df_test['path'] = '/home/ubuntu/FGVC11/data/PlantTrait/test_images/' + df_test['id'].astype(str) + '.jpeg'
df_test.to_csv('/home/ubuntu/FGVC11/data/PlantTrait/df_test.csv', index=False)

In [4]:
label_col = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']

In [5]:
if "plant_trait" in fo.list_datasets():
    dataset = fo.load_dataset("plant_trait")
    dataset.delete()
    
samples = []
# go through all the tiles
for _, row in tqdm(df_train.iterrows(), total=len(df_train)):
    
    # add images
    sample = fo.Sample(filepath=row['path'])
    for col in label_col:
        sample[col] = row[col]
    samples.append(sample)

# initialize the dataset (restart the notebook if you face and error) 
dataset = fo.Dataset("plant_trait")
dataset.add_samples(samples)

100%|██████████| 55489/55489 [00:07<00:00, 7386.43it/s]


 100% |█████████████| 55489/55489 [9.8s elapsed, 0s remaining, 5.5K samples/s]       


['6610b8604d16316b031ff067',
 '6610b8604d16316b031ff068',
 '6610b8604d16316b031ff069',
 '6610b8604d16316b031ff06a',
 '6610b8604d16316b031ff06b',
 '6610b8604d16316b031ff06c',
 '6610b8604d16316b031ff06d',
 '6610b8604d16316b031ff06e',
 '6610b8604d16316b031ff06f',
 '6610b8604d16316b031ff070',
 '6610b8604d16316b031ff071',
 '6610b8604d16316b031ff072',
 '6610b8604d16316b031ff073',
 '6610b8604d16316b031ff074',
 '6610b8604d16316b031ff075',
 '6610b8604d16316b031ff076',
 '6610b8604d16316b031ff077',
 '6610b8604d16316b031ff078',
 '6610b8604d16316b031ff079',
 '6610b8604d16316b031ff07a',
 '6610b8604d16316b031ff07b',
 '6610b8604d16316b031ff07c',
 '6610b8604d16316b031ff07d',
 '6610b8604d16316b031ff07e',
 '6610b8604d16316b031ff07f',
 '6610b8604d16316b031ff080',
 '6610b8604d16316b031ff081',
 '6610b8604d16316b031ff082',
 '6610b8604d16316b031ff083',
 '6610b8604d16316b031ff084',
 '6610b8604d16316b031ff085',
 '6610b8604d16316b031ff086',
 '6610b8604d16316b031ff087',
 '6610b8604d16316b031ff088',
 '6610b8604d16

In [6]:
# launch voxel51 on the desired port
session = fo.launch_app(dataset, port=5151)

In [7]:
# choose the model for generating the embeddings
model = foz.load_zoo_model("clip-vit-base32-torch")

emb = fob.compute_visualization(
    model=model,
    samples=dataset,
    num_dims=2,
    brain_key=f"emb",
    verbose=True,
    seed=51,
)

Computing embeddings...
 100% |█████████████| 55489/55489 [6.5m elapsed, 0s remaining, 141.0 samples/s]      
Generating visualization...


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


UMAP(random_state=51, verbose=True)
Sat Apr  6 02:57:29 2024 Construct fuzzy simplicial set
Sat Apr  6 02:57:29 2024 Finding Nearest Neighbors
Sat Apr  6 02:57:29 2024 Building RP forest with 17 trees
Sat Apr  6 02:57:33 2024 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	 4  /  16
	 5  /  16
	 6  /  16
	Stopping threshold met -- exiting after 6 iterations
Sat Apr  6 02:57:44 2024 Finished Nearest Neighbor Search
Sat Apr  6 02:57:46 2024 Construct embedding


Epochs completed:   0%|            0/200 [00:00]

	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Sat Apr  6 02:58:14 2024 Finished embedding
