In [1]:
import os
import torch
import pickle
import numpy as np
import pandas as pd

from PIL import Image
import torch.nn as nn
from tqdm import tqdm
import lightgbm as lgb

from torchvision import transforms
from sklearn.model_selection import KFold
from torchvision import models, transforms
from sklearn.preprocessing import LabelEncoder
from torchvision.models import efficientnet_b0
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
class CONFIG:
    SEED = 67

    TRAIN_PATH = '/kaggle/input/csiro-biomass/train.csv'
    TEST_PATH =  '/kaggle/input/csiro-biomass/test.csv'

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    weights = {
        "Dry_Clover_g": 0.1,
        "Dry_Dead_g": 0.1,
        "Dry_Green_g": 0.1,
        "Dry_Total_g": 0.5,
        "GDM_g": 0.2
    }

cfg = CONFIG()

In [5]:
class BiomassDataset(Dataset):
    def __init__(self, df, train=True):
        self.train = train
        self.df = df
        self.tf = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            ),
            
        ])
        
    def __len__(self):
        return len(self.df)


    def __getitem__(self, idx):
        path = self.df['image_path'].iloc[idx]
        img = Image.open(f'/kaggle/input/csiro-biomass/{path}').convert("RGB")
        img = self.tf(img)

        if self.train:
            targets = torch.tensor(self.df[['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']].iloc[idx].to_numpy(), dtype=torch.float)
            return img, targets
        else:
            return img

In [6]:
def weighted_r2_torch(y_true, y_pred, w):
    y_bar = (w * y_true).sum(dim=1, keepdim=True)
    ss_res = (w * (y_true - y_pred) ** 2).sum()
    ss_tot = (w * (y_true - y_bar) ** 2).sum()
    return 1 - ss_res / ss_tot

In [7]:
def clean_ids(data):
    return data.split('__')[0]
    
def preprocessing(data):
    data['sample_id'] = data['sample_id'].apply(clean_ids)

    if 'target' in data.columns:
        return data.pivot_table(
            index=[
                'sample_id',
                'image_path'
            ],
                columns='target_name', 
                values='target'
            ).reset_index()

    data = data[['sample_id', 'image_path']]
    return data.drop_duplicates()

In [8]:
test = pd.read_csv(cfg.TEST_PATH)
test = preprocessing(test)
test_dataset = BiomassDataset(test, train=False)

In [9]:
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [21]:
model = efficientnet_b0(weights=None)
model.classifier[1] = torch.nn.Sequential(
    torch.nn.Linear(1280, 5),
    torch.nn.ReLU()
)
model.load_state_dict(torch.load('/kaggle/input/model-0990u09u09/efficientnet_b7_checkpoint.pt'))
model = model.to(cfg.device)

In [22]:
model.eval()
preds = []
with torch.no_grad():
    for x in test_loader:
        x = x.to(cfg.device)

        out = model(x) 
        preds.append(out.cpu())

In [23]:
preds = torch.cat(preds, dim=0).numpy()

In [26]:
test = pd.read_csv(cfg.TEST_PATH)

In [36]:
output = pd.DataFrame({
    'sample_id': test['sample_id'].to_numpy(),
    'target': preds.flatten()
})

In [39]:
output.to_csv('submission.csv', index=False)

In [40]:
output.head()

Unnamed: 0,sample_id,target
0,ID1001187975__Dry_Clover_g,1.205311
1,ID1001187975__Dry_Dead_g,22.831341
2,ID1001187975__Dry_Green_g,21.47506
3,ID1001187975__Dry_Total_g,43.720478
4,ID1001187975__GDM_g,22.299875
