In [9]:
# Import Libraries
import numpy as np
import pandas as pd
import os
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split, Subset
from torchvision import models, transforms
from sklearn.preprocessing import StandardScaler

# Check GPU Availability
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and Prepare Data
train_df = pd.read_csv('./content/train.csv')
test_df = pd.read_csv('./content/test.csv')

# Define Dataset and DataLoader
class PlantDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, is_test=False, target_trait=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test
        self.target_trait = target_trait
        self.label_columns = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
        
        # Separate features and labels
        if self.is_test:
            self.features = self.data.drop(columns=['id'])
            self.labels = pd.DataFrame(columns=self.label_columns)
        else:
            self.features = self.data.drop(columns=['id'] + self.label_columns)
            self.labels = self.data[self.label_columns]
        
        # Initialize StandardScaler
        self.scaler = StandardScaler()
        self.features = pd.DataFrame(self.scaler.fit_transform(self.features), columns=self.features.columns)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, f"{self.data.iloc[idx, 0]}.jpeg")
        image = Image.open(img_name)

        if self.transform:
            image = self.transform(image)

        features = self.features.iloc[idx].values.astype(np.float32)

        if self.is_test:
            label = np.array([0], dtype=np.float32)  # Dummy label for test data
        else:
            if self.target_trait:
                label = np.array([self.labels.iloc[idx][self.target_trait]], dtype=np.float32)
            else:
                label = self.labels.iloc[idx].values.astype(np.float32)

        return image, features, label

# Image transformations
transform = transforms.Compose([
        transforms.Resize(224, interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

# Define Model Architecture
class PlantTraitPredictor(nn.Module):
    def __init__(self):
        super(PlantTraitPredictor, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 256)
        self.fc1 = nn.Linear(256 + 163, 128)
        self.fc2 = nn.Linear(128, 1)  # Predict a single trait

    def forward(self, image, ancillary_data):
        img_features = self.resnet(image)
        combined = torch.cat((img_features, ancillary_data), dim=1)
        x = nn.ReLU()(self.fc1(combined))
        x = self.fc2(x)
        return x



True
NVIDIA GeForce RTX 4090
Using device: cuda:0


In [11]:
# Training Function
def train_model(trait, num_epochs=25, batch_size=32, learning_rate=0.001):
    # Load the CSV file to get access to all the data
    df = pd.read_csv('./content/train.csv')
    
    # Calculate the percentiles for the target trait
    trait_data = df[trait]
    lower_bound = np.percentile(trait_data, 0.1)
    upper_bound = np.percentile(trait_data, 98)
    
    # Create a mask for the dataset
    mask = (trait_data > lower_bound) & (trait_data < upper_bound)
    
    # Apply the mask to get indices of non-outlier data
    valid_indices = np.where(mask)[0]

    # Create the full dataset
    full_dataset = PlantDataset(csv_file='./content/train.csv', img_dir='./content/train_images', transform=transform, target_trait=trait)

    # Create a new dataset with only the non-outlier data
    filtered_dataset = Subset(full_dataset, valid_indices)
    
    # Split into training and validation sets
    train_size = int(0.8 * len(filtered_dataset))
    val_size = len(filtered_dataset) - train_size
    train_dataset, val_dataset = random_split(filtered_dataset, [train_size, val_size])

    # smaller datasrt4
    #train_dataset = Subset(train_dataset, range(3200))
    #val_dataset = Subset(val_dataset, range(3200))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Print statistics about the filtering
    print(f"Original dataset size: {len(full_dataset)}")
    print(f"Filtered dataset size: {len(filtered_dataset)}")
    print(f"Removed {len(full_dataset) - len(filtered_dataset)} samples ({(1 - len(filtered_dataset)/len(full_dataset))*100:.2f}%)")

    model = PlantTraitPredictor().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

    best_val_loss = float('inf')
    patience = 7
    counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for images, ancillary_data, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, ancillary_data, labels = images.to(device), ancillary_data.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images, ancillary_data)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, ancillary_data, labels in val_loader:
                images, ancillary_data, labels = images.to(device), ancillary_data.to(device), labels.to(device)
                outputs = model(images, ancillary_data)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
            torch.save(model.state_dict(), f'./content/best_model_{trait}.pth')
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    return model


# Train Models for Each Trait
traits = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']

trained_models = {}

# for trait, index in traits:
#     print(f"\nTraining model for {trait}")
#     model = train_model(trait, num_epochs=epochs[index], batch_size=32)
#     trained_models[trait] = model

index = 0

def train_one(index, epochs=30):
    trait = traits[index]
    print(f"\nTraining model for {trait}")
    model = train_model(trait, num_epochs=epochs, batch_size=32)
    trained_models[trait] = model


In [12]:
train_one(0)


Training model for X4_mean
Original dataset size: 43363
Filtered dataset size: 42448
Removed 915 samples (2.11%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:39<00:00, 10.69it/s]


Epoch [1/30], Train Loss: 0.0220, Val Loss: 0.0241


Epoch 2/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.29it/s]


Epoch [2/30], Train Loss: 0.0155, Val Loss: 0.0159


Epoch 3/30: 100%|██████████| 1062/1062 [01:37<00:00, 10.94it/s]


Epoch [3/30], Train Loss: 0.0153, Val Loss: 0.0286


Epoch 4/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.07it/s]


Epoch [4/30], Train Loss: 0.0154, Val Loss: 0.0535


Epoch 5/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [5/30], Train Loss: 0.0152, Val Loss: 0.0164


Epoch 6/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.04it/s]


Epoch [6/30], Train Loss: 0.0150, Val Loss: 0.0155


Epoch 7/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [7/30], Train Loss: 0.0150, Val Loss: 0.0144


Epoch 8/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.09it/s]


Epoch [8/30], Train Loss: 0.0148, Val Loss: 0.0144


Epoch 9/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.12it/s]


Epoch [9/30], Train Loss: 0.0148, Val Loss: 0.0144


Epoch 10/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [10/30], Train Loss: 0.0147, Val Loss: 0.0146


Epoch 11/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [11/30], Train Loss: 0.0146, Val Loss: 0.0148


Epoch 12/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [12/30], Train Loss: 0.0145, Val Loss: 0.0142


Epoch 13/30: 100%|██████████| 1062/1062 [01:46<00:00, 10.01it/s]


Epoch [13/30], Train Loss: 0.0145, Val Loss: 0.0164


Epoch 14/30: 100%|██████████| 1062/1062 [01:36<00:00, 10.98it/s]


Epoch [14/30], Train Loss: 0.0145, Val Loss: 0.0143


Epoch 15/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.12it/s]


Epoch [15/30], Train Loss: 0.0146, Val Loss: 0.0143


Epoch 16/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.12it/s]


Epoch [16/30], Train Loss: 0.0145, Val Loss: 0.0142


Epoch 17/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.10it/s]


Epoch [17/30], Train Loss: 0.0145, Val Loss: 0.0143


Epoch 18/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [18/30], Train Loss: 0.0144, Val Loss: 0.0144


Epoch 19/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [19/30], Train Loss: 0.0145, Val Loss: 0.0154


Epoch 20/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.09it/s]


Epoch [20/30], Train Loss: 0.0144, Val Loss: 0.0153


Epoch 21/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.08it/s]


Epoch [21/30], Train Loss: 0.0139, Val Loss: 0.0139


Epoch 22/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [22/30], Train Loss: 0.0138, Val Loss: 0.0139


Epoch 23/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.08it/s]


Epoch [23/30], Train Loss: 0.0138, Val Loss: 0.0139


Epoch 24/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.18it/s]


Epoch [24/30], Train Loss: 0.0138, Val Loss: 0.0139


Epoch 25/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.07it/s]


Epoch [25/30], Train Loss: 0.0137, Val Loss: 0.0139


Epoch 26/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.07it/s]


Epoch [26/30], Train Loss: 0.0137, Val Loss: 0.0143


Epoch 27/30: 100%|██████████| 1062/1062 [01:32<00:00, 11.43it/s]


Epoch [27/30], Train Loss: 0.0137, Val Loss: 0.0140


Epoch 28/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.36it/s]


Epoch [28/30], Train Loss: 0.0136, Val Loss: 0.0138


Epoch 29/30: 100%|██████████| 1062/1062 [01:32<00:00, 11.45it/s]


Epoch [29/30], Train Loss: 0.0136, Val Loss: 0.0138


Epoch 30/30: 100%|██████████| 1062/1062 [01:31<00:00, 11.58it/s]


Epoch [30/30], Train Loss: 0.0136, Val Loss: 0.0138


In [13]:
train_one(1)


Training model for X11_mean
Original dataset size: 43363
Filtered dataset size: 42450
Removed 913 samples (2.11%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.06it/s]


Epoch [1/30], Train Loss: 319.6569, Val Loss: 64.0803


Epoch 2/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.08it/s]


Epoch [2/30], Train Loss: 49.0624, Val Loss: 80.7666


Epoch 3/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.06it/s]


Epoch [3/30], Train Loss: 49.4234, Val Loss: 41.6860


Epoch 4/30: 100%|██████████| 1062/1062 [01:37<00:00, 10.94it/s]


Epoch [4/30], Train Loss: 43.9640, Val Loss: 45.0796


Epoch 5/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.07it/s]


Epoch [5/30], Train Loss: 42.7867, Val Loss: 34.3601


Epoch 6/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.10it/s]


Epoch [6/30], Train Loss: 44.9389, Val Loss: 33.1783


Epoch 7/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [7/30], Train Loss: 41.5594, Val Loss: 51.1266


Epoch 8/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [8/30], Train Loss: 42.2325, Val Loss: 33.5872


Epoch 9/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [9/30], Train Loss: 39.8519, Val Loss: 39.6247


Epoch 10/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.03it/s]


Epoch [10/30], Train Loss: 40.3711, Val Loss: 32.9527


Epoch 11/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [11/30], Train Loss: 39.0943, Val Loss: 60.2623


Epoch 12/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.12it/s]


Epoch [12/30], Train Loss: 38.4529, Val Loss: 55.7677


Epoch 13/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.11it/s]


Epoch [13/30], Train Loss: 39.8097, Val Loss: 37.8055


Epoch 14/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [14/30], Train Loss: 39.7348, Val Loss: 41.5441


Epoch 15/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [15/30], Train Loss: 32.9371, Val Loss: 33.0006


Epoch 16/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [16/30], Train Loss: 32.7081, Val Loss: 34.8827


Epoch 17/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.03it/s]


Epoch [17/30], Train Loss: 32.5885, Val Loss: 32.8357


Epoch 18/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.09it/s]


Epoch [18/30], Train Loss: 32.5442, Val Loss: 33.0086


Epoch 19/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.02it/s]


Epoch [19/30], Train Loss: 32.4407, Val Loss: 32.6149


Epoch 20/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [20/30], Train Loss: 32.3079, Val Loss: 32.3411


Epoch 21/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.07it/s]


Epoch [21/30], Train Loss: 32.5509, Val Loss: 32.4761


Epoch 22/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.11it/s]


Epoch [22/30], Train Loss: 32.3622, Val Loss: 35.4159


Epoch 23/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.09it/s]


Epoch [23/30], Train Loss: 32.2846, Val Loss: 32.6941


Epoch 24/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [24/30], Train Loss: 32.2723, Val Loss: 32.5967


Epoch 25/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.33it/s]


Epoch [25/30], Train Loss: 31.7175, Val Loss: 32.3171


Epoch 26/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.30it/s]


Epoch [26/30], Train Loss: 31.6657, Val Loss: 32.2200


Epoch 27/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.30it/s]


Epoch [27/30], Train Loss: 31.6297, Val Loss: 32.2249


Epoch 28/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [28/30], Train Loss: 31.6012, Val Loss: 32.2218


Epoch 29/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [29/30], Train Loss: 31.6320, Val Loss: 32.2158


Epoch 30/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [30/30], Train Loss: 31.5514, Val Loss: 32.7479


In [14]:
train_one(2, 30)


Training model for X18_mean
Original dataset size: 43363
Filtered dataset size: 42442
Removed 921 samples (2.12%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [1/30], Train Loss: 16704864.5887, Val Loss: 569580.7493


Epoch 2/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [2/30], Train Loss: 20619.0316, Val Loss: 3783318.1212


Epoch 3/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.30it/s]


Epoch [3/30], Train Loss: 16089.2341, Val Loss: 67244.7320


Epoch 4/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.14it/s]


Epoch [4/30], Train Loss: 16774.2359, Val Loss: 692347.0310


Epoch 5/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [5/30], Train Loss: 17142.7388, Val Loss: 261314.3670


Epoch 6/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [6/30], Train Loss: 29564.4363, Val Loss: 1559894.7773


Epoch 7/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.19it/s]


Epoch [7/30], Train Loss: 13729.0958, Val Loss: 38352246.4962


Epoch 8/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.21it/s]


Epoch [8/30], Train Loss: 181.0877, Val Loss: 63645.2511


Epoch 9/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [9/30], Train Loss: 171.7852, Val Loss: 4227.6307


Epoch 10/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [10/30], Train Loss: 149.6403, Val Loss: 203.1710


Epoch 11/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [11/30], Train Loss: 197.1270, Val Loss: 3620.9496


Epoch 12/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [12/30], Train Loss: 161.4236, Val Loss: 33119.1944


Epoch 13/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [13/30], Train Loss: 159.3828, Val Loss: 1022459.4391


Epoch 14/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.20it/s]


Epoch [14/30], Train Loss: 188.6956, Val Loss: 6848.7142


Epoch 15/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [15/30], Train Loss: 14.7557, Val Loss: 660.4821


Epoch 16/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [16/30], Train Loss: 13.4155, Val Loss: 8470.0273


Epoch 17/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [17/30], Train Loss: 13.9413, Val Loss: 2308.2051
Early stopping triggered at epoch 17


In [15]:
train_one(3)


Training model for X26_mean
Original dataset size: 43363
Filtered dataset size: 42447
Removed 916 samples (2.11%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [1/30], Train Loss: 295911.7853, Val Loss: 124692.0570


Epoch 2/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [2/30], Train Loss: 3557.3705, Val Loss: 1770667.3543


Epoch 3/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [3/30], Train Loss: 2822.0342, Val Loss: 79784.4722


Epoch 4/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [4/30], Train Loss: 2714.7964, Val Loss: 5990.9621


Epoch 5/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.29it/s]


Epoch [5/30], Train Loss: 3648.5986, Val Loss: 19744.5363


Epoch 6/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.33it/s]


Epoch [6/30], Train Loss: 2474.2604, Val Loss: 216487.7862


Epoch 7/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [7/30], Train Loss: 2562.4642, Val Loss: 6507434.5169


Epoch 8/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [8/30], Train Loss: 2448.7946, Val Loss: 2513.5395


Epoch 9/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [9/30], Train Loss: 2466.7498, Val Loss: 9350.8277


Epoch 10/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [10/30], Train Loss: 2397.2535, Val Loss: 5768.4521


Epoch 11/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [11/30], Train Loss: 3128.3804, Val Loss: 2324.6520


Epoch 12/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [12/30], Train Loss: 2564.3982, Val Loss: 2540.6171


Epoch 13/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.30it/s]


Epoch [13/30], Train Loss: 2913.6970, Val Loss: 341406.7244


Epoch 14/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.21it/s]


Epoch [14/30], Train Loss: 2130.7950, Val Loss: 1840.1344


Epoch 15/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [15/30], Train Loss: 2213.8062, Val Loss: 3245.5752


Epoch 16/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [16/30], Train Loss: 2288.4901, Val Loss: 3080.3446


Epoch 17/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.30it/s]


Epoch [17/30], Train Loss: 2151.4123, Val Loss: 8939.8710


Epoch 18/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [18/30], Train Loss: 2322.1781, Val Loss: 1773.8466


Epoch 19/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.30it/s]


Epoch [19/30], Train Loss: 1951.2338, Val Loss: 4312.4703


Epoch 20/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [20/30], Train Loss: 2205.9082, Val Loss: 2790.4600


Epoch 21/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [21/30], Train Loss: 2073.7638, Val Loss: 7283.7356


Epoch 22/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [22/30], Train Loss: 1842.7869, Val Loss: 2297.0040


Epoch 23/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [23/30], Train Loss: 1089.7493, Val Loss: 1501.4615


Epoch 24/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [24/30], Train Loss: 1092.3286, Val Loss: 1329.8469


Epoch 25/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [25/30], Train Loss: 1088.5070, Val Loss: 1174.1363


Epoch 26/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [26/30], Train Loss: 1093.0200, Val Loss: 1167.5946


Epoch 27/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [27/30], Train Loss: 5462.8186, Val Loss: 1455.3476


Epoch 28/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [28/30], Train Loss: 1092.7055, Val Loss: 1450.7161


Epoch 29/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [29/30], Train Loss: 1102.3087, Val Loss: 2189.9543


Epoch 30/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [30/30], Train Loss: 1090.2494, Val Loss: 1739.4881


In [16]:
train_one(4)


Training model for X50_mean
Original dataset size: 43363
Filtered dataset size: 42450
Removed 913 samples (2.11%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [1/30], Train Loss: 2.0695, Val Loss: 0.6831


Epoch 2/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [2/30], Train Loss: 0.4070, Val Loss: 0.6664


Epoch 3/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [3/30], Train Loss: 0.3660, Val Loss: 2.8355


Epoch 4/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.31it/s]


Epoch [4/30], Train Loss: 0.3549, Val Loss: 0.9314


Epoch 5/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.21it/s]


Epoch [5/30], Train Loss: 0.3763, Val Loss: 6.2696


Epoch 6/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [6/30], Train Loss: 0.3781, Val Loss: 0.4026


Epoch 7/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.26it/s]


Epoch [7/30], Train Loss: 0.3370, Val Loss: 0.2803


Epoch 8/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [8/30], Train Loss: 0.3419, Val Loss: 1.6884


Epoch 9/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.29it/s]


Epoch [9/30], Train Loss: 0.3336, Val Loss: 0.4155


Epoch 10/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.32it/s]


Epoch [10/30], Train Loss: 0.3375, Val Loss: 0.5005


Epoch 11/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.29it/s]


Epoch [11/30], Train Loss: 0.3329, Val Loss: 0.3213


Epoch 12/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.31it/s]


Epoch [12/30], Train Loss: 0.2653, Val Loss: 0.2781


Epoch 13/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [13/30], Train Loss: 0.2650, Val Loss: 0.2667


Epoch 14/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [14/30], Train Loss: 0.2644, Val Loss: 0.2660


Epoch 15/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [15/30], Train Loss: 0.2650, Val Loss: 0.2651


Epoch 16/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.25it/s]


Epoch [16/30], Train Loss: 0.2641, Val Loss: 0.2679


Epoch 17/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.28it/s]


Epoch [17/30], Train Loss: 0.2646, Val Loss: 0.2783


Epoch 18/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.11it/s]


Epoch [18/30], Train Loss: 0.2649, Val Loss: 0.3259


Epoch 19/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [19/30], Train Loss: 0.2636, Val Loss: 0.3577


Epoch 20/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [20/30], Train Loss: 0.2588, Val Loss: 0.2644


Epoch 21/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.14it/s]


Epoch [21/30], Train Loss: 0.2581, Val Loss: 0.2641


Epoch 22/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.08it/s]


Epoch [22/30], Train Loss: 0.2580, Val Loss: 0.2640


Epoch 23/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.12it/s]


Epoch [23/30], Train Loss: 0.2581, Val Loss: 0.2645


Epoch 24/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.10it/s]


Epoch [24/30], Train Loss: 0.2578, Val Loss: 0.2643


Epoch 25/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [25/30], Train Loss: 0.2577, Val Loss: 0.2646


Epoch 26/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [26/30], Train Loss: 0.2575, Val Loss: 0.2638


Epoch 27/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [27/30], Train Loss: 0.2575, Val Loss: 0.2642


Epoch 28/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.27it/s]


Epoch [28/30], Train Loss: 0.2579, Val Loss: 0.2637


Epoch 29/30: 100%|██████████| 1062/1062 [01:33<00:00, 11.32it/s]


Epoch [29/30], Train Loss: 0.2575, Val Loss: 0.2673


Epoch 30/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.29it/s]


Epoch [30/30], Train Loss: 0.2574, Val Loss: 0.2654


In [17]:
train_one(5)


Training model for X3112_mean
Original dataset size: 43363
Filtered dataset size: 42443
Removed 920 samples (2.12%)


Epoch 1/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [1/30], Train Loss: 26400946910.3579, Val Loss: 155054777.1429


Epoch 2/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [2/30], Train Loss: 5313978.9495, Val Loss: 2123945747.2481


Epoch 3/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.17it/s]


Epoch [3/30], Train Loss: 5450553.1718, Val Loss: 31786470677.1729


Epoch 4/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.22it/s]


Epoch [4/30], Train Loss: 7106870.8263, Val Loss: 1074666788.0902


Epoch 5/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.17it/s]


Epoch [5/30], Train Loss: 4879951.3340, Val Loss: 153784150.7368


Epoch 6/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.21it/s]


Epoch [6/30], Train Loss: 4611177.8200, Val Loss: 282858097.8647


Epoch 7/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [7/30], Train Loss: 4549494.7467, Val Loss: 180715950.5564


Epoch 8/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.19it/s]


Epoch [8/30], Train Loss: 4496441.8087, Val Loss: 28031980.4962


Epoch 9/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [9/30], Train Loss: 4909796.3141, Val Loss: 59349163.0827


Epoch 10/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.21it/s]


Epoch [10/30], Train Loss: 4607734.1782, Val Loss: 15736347.7970


Epoch 11/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.24it/s]


Epoch [11/30], Train Loss: 4806260.3870, Val Loss: 3352745.0456


Epoch 12/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.15it/s]


Epoch [12/30], Train Loss: 4346813.7127, Val Loss: 3101666654.3158


Epoch 13/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.16it/s]


Epoch [13/30], Train Loss: 4537256.9435, Val Loss: 78253982.5714


Epoch 14/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [14/30], Train Loss: 4458989.1476, Val Loss: 15281727.9098


Epoch 15/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.03it/s]


Epoch [15/30], Train Loss: 4803345.8658, Val Loss: 20595381.6165


Epoch 16/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.05it/s]


Epoch [16/30], Train Loss: 2670108.0786, Val Loss: 3413551.2467


Epoch 17/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.04it/s]


Epoch [17/30], Train Loss: 2657986.1730, Val Loss: 2889064.7556


Epoch 18/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.03it/s]


Epoch [18/30], Train Loss: 2664405.0811, Val Loss: 11318501.9154


Epoch 19/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.04it/s]


Epoch [19/30], Train Loss: 2637724.9470, Val Loss: 42422957.6842


Epoch 20/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.01it/s]


Epoch [20/30], Train Loss: 2615343.3164, Val Loss: 2931370.0168


Epoch 21/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.00it/s]


Epoch [21/30], Train Loss: 2656393.9209, Val Loss: 2517223.1921


Epoch 22/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.10it/s]


Epoch [22/30], Train Loss: 2663648.4692, Val Loss: 3961539.3280


Epoch 23/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.19it/s]


Epoch [23/30], Train Loss: 2657476.8835, Val Loss: 3815456.9145


Epoch 24/30: 100%|██████████| 1062/1062 [01:36<00:00, 11.06it/s]


Epoch [24/30], Train Loss: 2649782.0817, Val Loss: 16118182.4549


Epoch 25/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.17it/s]


Epoch [25/30], Train Loss: 2622859.3628, Val Loss: 4437726.7655


Epoch 26/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.10it/s]


Epoch [26/30], Train Loss: 2485412.4137, Val Loss: 2412544.1221


Epoch 27/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.14it/s]


Epoch [27/30], Train Loss: 2486396.4806, Val Loss: 2618313.1826


Epoch 28/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.20it/s]


Epoch [28/30], Train Loss: 2484338.5453, Val Loss: 3823958.6128


Epoch 29/30: 100%|██████████| 1062/1062 [01:35<00:00, 11.13it/s]


Epoch [29/30], Train Loss: 2482299.4992, Val Loss: 2566305.7784


Epoch 30/30: 100%|██████████| 1062/1062 [01:34<00:00, 11.23it/s]


Epoch [30/30], Train Loss: 2479777.0363, Val Loss: 2603236.3610


In [18]:
def predict(model, test_loader, device):
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, ancillary_data, _ in tqdm(test_loader):
            images, ancillary_data = images.to(device), ancillary_data.to(device)
            outputs = model(images, ancillary_data)
            predictions.extend(outputs.cpu().numpy())
    return predictions

# Load and preprocess test data
test_df = pd.read_csv('./content/test.csv')
label_columns = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
features = test_df.drop(columns=['id'])

# Use the scaler fitted on training data
scaler = StandardScaler()
scaler.fit(pd.read_csv('./content/train.csv').drop(columns=['id'] + label_columns))
scaled_features = scaler.transform(features)

# Create test dataset and dataloader
test_dataset = PlantDataset(csv_file='./content/test.csv', img_dir='./content/test_images', transform=transform, is_test=True)
test_dataset.features = pd.DataFrame(scaled_features, columns=features.columns)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Make predictions for each trait
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
predictions = {}
traits = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
# traits = ['X11_mean']

for trait in traits:
    model = PlantTraitPredictor().to(device)
    model.load_state_dict(torch.load(f'./content/best_model_{trait}.pth'))
    predictions[trait] = predict(model, test_loader, device)

# Create submission file
submission_df = pd.DataFrame({'id': test_df['id']})
for trait in traits:
    submission_df[trait.replace('_mean', '')] = predictions[trait]

submission_df.to_csv('./content/submission.csv', index=False)
print("Submission file created: ./content/submission.csv")

print(submission_df.describe())

100%|██████████| 200/200 [00:13<00:00, 15.07it/s]
100%|██████████| 200/200 [00:13<00:00, 14.88it/s]
100%|██████████| 200/200 [00:12<00:00, 16.03it/s]
100%|██████████| 200/200 [00:13<00:00, 15.14it/s]
100%|██████████| 200/200 [00:13<00:00, 15.36it/s]
100%|██████████| 200/200 [00:13<00:00, 15.28it/s]


Submission file created: ./content/submission.csv
                 id
count  6.391000e+03
mean   1.706187e+08
std    4.190301e+07
min    1.215100e+05
25%    1.648002e+08
50%    1.913817e+08
75%    1.950978e+08
max    1.967498e+08
