In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle
import os
import sys
import warnings
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import timm



In [3]:
STUDENT_MODEL_PATH = '/kaggle/input/csiro-model/pytorch/default/2'

RAW_DATA_DIR = "/kaggle/input/csiro-biomass"
TEST_CSV = os.path.join(RAW_DATA_DIR, "test.csv")
TEST_IMG_DIR = os.path.join(RAW_DATA_DIR, "test") 

TARGET_NAMES = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g', 'GDM_g', 'Dry_Total_g']

In [4]:
class TestDataset(Dataset):
    def __init__(self, df_test, img_dir, transforms, img_size):
        self.df = df_test
        self.img_dir = img_dir 
        self.transforms = transforms
        self.img_size = img_size
        self.image_paths = df_test['image_path'].unique()

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path_index = self.image_paths[idx] 
        filename = img_path_index.split('/')[-1]
        full_img_path = os.path.join(self.img_dir, filename)

        try:
            image = Image.open(full_img_path).convert('RGB')
            image = self.transforms(image)
        except Exception as e:
            print(f"Error loading {full_img_path}: {e}")
            image = torch.zeros((3, self.img_size, self.img_size))
        
        return {
            'image_path': img_path_index, 
            'image': image
        }


In [5]:
class StudentModel(nn.Module):

    def __init__(self, img_model_name='efficientnet_b2'):
        super(StudentModel, self).__init__()

        # 1. Image Backbone
        self.img_backbone = timm.create_model(
            img_model_name,
            pretrained=False, # different!!!
            num_classes=0,
            global_pool='' 
        )

        self.num_img_features = self.img_backbone.num_features # 1408
        self.embed_dim = 256 # embedding dims
        self.num_heads = 8   # attention head
        self.num_targets = 5 # 5 targets

        # 1.2 Projector
        self.patch_projector = nn.Linear(self.num_img_features, self.embed_dim)

        # 2. Query
        self.query_tokens = nn.Parameter(
            torch.randn(1, self.num_targets, self.embed_dim)
        )

        # 2.2 Transformer
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=self.embed_dim,         # 256
            nhead=self.num_heads,           # 8
            dim_feedforward=self.embed_dim * 4, # 1024 (MLP  extension)
            dropout=0.1,                    
            batch_first=True
        )
        self.transformer_decoder = nn.TransformerDecoder(
            decoder_layer, 
            num_layers=3  
        )

        # 3. Prediction Head
        
        self.prediction_head = nn.Sequential(
            nn.LayerNorm(self.embed_dim),
            nn.Linear(self.embed_dim, self.embed_dim * 2), # 256 -> 512
            nn.ReLU(),
            nn.Dropout(0.3), 
            nn.Linear(self.embed_dim * 2, 1) # 512 -> 1
        )

        # Selective Fine-tuning
        for param in self.img_backbone.parameters():
            param.requires_grad = False
            
        for param in self.img_backbone.blocks[-3:].parameters(): 
            param.requires_grad = True

        if hasattr(self.img_backbone, 'conv_head'):
             for param in self.img_backbone.conv_head.parameters():
                  param.requires_grad = True
        if hasattr(self.img_backbone, 'bn2'):
             for param in self.img_backbone.bn2.parameters():
                  param.requires_grad = True

    def forward(self, image):
        B = image.shape[0]

        # 1. get feature map: [B, 1408, 8, 8]
        x_map = self.img_backbone(image)

        # 2. flat: [B, 64, 1408]
        patches = x_map.flatten(2).permute(0, 2, 1) 
        
        # 3. projector (get Key/Value)
        # memory shape: [B, 64, 256]
        memory = self.patch_projector(patches)

        # 4. Query
        # query shape: [B, 5, 256]
        query = self.query_tokens.expand(B, -1, -1)

        # 5. Transformer encoding
        # tgt = query tokens [B, 5, 256]
        # memory = [B, 64, 256]
        # attn_output shape: [B, 5, 256]
        attn_output = self.transformer_decoder(
            tgt=query, 
            memory=memory
        )

        # 6. get prediction
        # output shape: [B, 5, 1]
        output = self.prediction_head(attn_output)
        
        # 7. squeeze: [B, 5, 1] -> [B, 5]
        output = output.squeeze(-1)

        return output

In [8]:
# ## 3. loading models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

student_models = []

for i in range(1, 6):
    model = StudentModel().to(device)
    model.load_state_dict(
        torch.load(
            os.path.join(STUDENT_MODEL_PATH, f"best_student_model_fold_{i}.pth"),
            map_location=device
        )
    )
    model.eval()
    student_models.append(model)


Using device: cuda


In [9]:
# ## 4. handling test dataset

df_test = pd.read_csv(TEST_CSV)
df_test = df_test[['image_path']].drop_duplicates()
print(f"Found {len(df_test)} unique test images.")



test_transforms = transforms.Compose([
    transforms.Resize((260, 260)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_dataset = TestDataset(df_test, TEST_IMG_DIR, test_transforms, 260)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

print("Test DataLoader created.")



print("Running five student models on test set...")
for model in student_models:
    model.eval()

all_preds = []
all_image_paths = []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Running Ensemble Inference"):
        image = batch['image'].to(device)

        batch_preds = []
        for i, model in enumerate(student_models):
            pred_log = model(image)
            pred_orig = torch.expm1(pred_log)
            batch_preds.append(pred_orig)
        
        batch_preds = torch.stack(batch_preds, dim=0)  
        ensemble_pred = torch.mean(batch_preds, dim=0)  
        
        all_preds.append(ensemble_pred.cpu())
        all_image_paths.extend(batch['image_path'])

preds_tensor = torch.cat(all_preds, dim=0).numpy()

Found 1 unique test images.
Test DataLoader created.
Running five student models on test set...


Running Ensemble Inference: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

Inference complete. 预测形状: (1, 5)





In [10]:
# Formatting submission file

df_preds_wide = pd.DataFrame(preds_tensor, columns=TARGET_NAMES)
df_preds_wide['image_path'] = all_image_paths


df_preds_long = df_preds_wide.melt(
    id_vars='image_path', 
    var_name='target_name', 
    value_name='target'
)


df_preds_long['image_id'] = df_preds_long['image_path'].apply(lambda x: x.split('/')[-1].split('.')[0])
df_preds_long['sample_id'] = df_preds_long['image_id'] + '__' + df_preds_long['target_name']


submission_df = df_preds_long[['sample_id', 'target']]


submission_df.loc[:, 'target'] = submission_df['target'].clip(lower=0)


submission_df.to_csv('submission.csv', index=False)

print("submission.csv created successfully!")
print("Submission file head:")
print(submission_df.head(10))

Formatting submission file...
submission.csv created successfully!
Submission file head:
                    sample_id     target
0   ID1001187975__Dry_Green_g  23.954681
1    ID1001187975__Dry_Dead_g  21.282005
2  ID1001187975__Dry_Clover_g   0.745209
3         ID1001187975__GDM_g  24.372406
4   ID1001187975__Dry_Total_g  49.692810
