In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import time
from torcheval.metrics import R2Score
from sklearn.ensemble import RandomForestRegressor
from transformers import AutoImageProcessor, AutoModel

In [None]:
print(f'PyTorch version: {torch.__version__}')
print('*'*10)
print(f'_CUDA version: ')
print('*'*10)
print(f'CUDNN version: {torch.backends.cudnn.version()}')
print(f'Available GPU devices: {torch.cuda.device_count()}')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class PlantTraitDataset(Dataset):
    def __init__(self, csv_file, root_dir, mean, std, transform=None, return_id = False, test_set = True):
        """
        Arguments:
            csv_file (string): Path to the csv file with auxiliary data.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.auxiliary_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.return_id = return_id
        self.test_set = test_set
        self.mean = mean
        self.std = std

    def __len__(self):
        return len(self.auxiliary_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_id = self.auxiliary_frame.iloc[idx, 0]
        img_name = os.path.join(self.root_dir, str(img_id)+".jpeg")
        image_pil = Image.open(img_name)
        image = np.array(image_pil).astype(np.float64)
        auxiliary = self.auxiliary_frame.iloc[idx, 1:]
        auxiliary = np.array(auxiliary, dtype=float)

        if self.return_id and self.test_set:
            auxiliary = np.divide( np.subtract(auxiliary, self.mean[:-6]), self.std[:-6])
        else:
            auxiliary = np.divide( np.subtract(auxiliary, self.mean), self.std)

        data, labels = auxiliary[:-6], auxiliary[-6:]

        if self.transform:
            image = self.transform(image)

        return (img_id, image, data, labels) if self.return_id else (image, data, labels)

In [None]:
NO_TRANSFORM = transforms.Compose([])
batch_size = 128

In [None]:
auxiliary_data = pd.read_csv("./data/train.csv").to_numpy()
means = auxiliary_data.mean(axis=0)[1:]
stds = auxiliary_data.std(axis=0)[1:]

In [None]:
train_dataset = PlantTraitDataset("./data/train.csv", "./data/train_images/", means, stds, NO_TRANSFORM)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size)

test_dataset = PlantTraitDataset("./data/test.csv", "./data/test_images/", means, stds, NO_TRANSFORM, True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size)

In [None]:
## Old Backbones
class GoogleVitFeatureExtractor:
    def __init__(self):
        self.processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
        self.model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(device)
        self.model.eval()

    def extract_features(self, images):
        inputs = self.processor(images, return_tensors="pt").to(device)
        outputs = self.model(**inputs)
        return outputs.pooler_output

class DinoV2SmallFeatureExtractor:
    def __init__(self):
        self.processor = AutoImageProcessor.from_pretrained("facebook/dinov2-small")
        self.vit = AutoModel.from_pretrained("facebook/dinov2-small").to(device)
        self.vit.eval()

    def extract_features(self, images):
        inputs = self.processor(images, return_tensors="pt").to(device)
        outputs = self.vit(**inputs)
        return outputs.pooler_output


## MAIN BACKBONE
class DinoV2GiantFeatureExtractor:
    def __init__(self):
        self.processor = AutoImageProcessor.from_pretrained("facebook/dinov2-giant")
        self.vit = AutoModel.from_pretrained("facebook/dinov2-giant").to(device)
        self.vit.eval()

    def extract_features(self, images):
        inputs = self.processor(images, return_tensors="pt").to(device)
        outputs = self.vit(**inputs)
        return outputs.pooler_output

In [None]:
model = DinoV2GiantFeatureExtractor() ## Can swap to the small feature extractor for testing

In [None]:
class PlantFeatureExtractor:
    def __init__(self, model):
        model.vit.eval()
        self.model = model

        self.labels = np.array([])
        self.features_with_aux = np.array([])

    def setup(self, image_loader):
        feature_arrays = []
        label_arrays = []

        num_steps = len(image_loader)

        with torch.no_grad():
            for i, data in enumerate(image_loader):
                images, aux, labels = data
                images = images.to(device)
                aux = aux.numpy()
                labels = labels.numpy()


                features = self.model.extract_features(images).cpu().numpy()
                features = features.reshape(features.shape[0], -1)
                print(features.shape, aux.shape)

                features_with_aux = np.concatenate((features, aux), axis=1)

                feature_arrays += [features_with_aux]
                label_arrays += [labels]


                if i % 10 == 9:
                    print(f"Finished step {i}/{num_steps}")

        self.features_with_aux = np.vstack(feature_arrays)
        self.labels = np.vstack(label_arrays)


    def save_features(self):
        combined = np.hstack((self.features_with_aux, self.labels))
        DF = pd.DataFrame(combined)
        ts = time.time()
        DF.to_csv(f"./features/features_{ts}.csv", index=False)
        DF.to_csv(f"./features/features_big.csv", index=False)


prf = PlantFeatureExtractor(model)

In [None]:
prf.setup(train_loader)

In [None]:
prf.save_features()

In [None]:
def produce_test_features(model: RandomForestRegressor, data_loader):
    model.model.vit.eval()

    predictions = []

    with torch.no_grad():
        for i, data in enumerate(data_loader, 0):
            ids, images, data, labels = data
            data = torch.concat((data, labels), dim=1).numpy() #labels is not actually labels
            images = images.to(device)

            col_ids = torch.unsqueeze(ids.to(torch.int64), dim=1)

            features = model.model.extract_features(images).cpu().numpy()
            features = features.reshape(features.shape[0], -1)

            features_with_aux = torch.from_numpy(np.concatenate((features, data), axis=1))


            ids_and_output = torch.cat((col_ids, features_with_aux), dim=1).cpu().numpy()

            predictions.append(ids_and_output)

            print(features_with_aux.shape, ids_and_output.shape)

            if i % 10 == 9:
                print(f"Finished step {i}/{len(data_loader)}")

    predictions = np.vstack(predictions)
    print(predictions.shape)

    return predictions

features = produce_test_features(prf, test_loader)


In [None]:
features_df = pd.DataFrame(features)
ts = time.time()
features_df.to_csv(f"./features/features_test_{ts}.csv", index=False)
features_df.to_csv(f"./features/features_test.csv", index=False)