Imported all necessary libraries

In [15]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import timm
import glob

Checking if CUDA exists

In [16]:
print(f'torch version = {torch.__version__}')
#checking if CUDA is available
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device = {device}')

torch version = 2.5.0+cu118
device = cuda


setting seed for reproducability

In [17]:
seed=42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

defined image and batch size

In [18]:
IMG_SIZE = 224
BATCH_SIZE = 32

defined test data path and transforms

In [19]:
DATASET_PATH = '.'  # Main dataset directory
TEST_PATH = os.path.join(DATASET_PATH, 'test_images')

test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

defined a class to help with traing and validation set creation

In [20]:
class PaddyLeafDatasetWithMetadata(Dataset):
    def __init__(self, root_dir,transforms):
        self.root_dir = root_dir
        self.transform = transforms

        self.samples = []
        for img_path in glob.glob(os.path.join(self.root_dir, '*.jpg')):
            self.samples.append((img_path))


    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path = self.samples[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image ,img_path

prepared my test dataset and dataloader

In [21]:
# Load test dataset (without metadata)
test_dataset = PaddyLeafDatasetWithMetadata(
    TEST_PATH,
    transforms=test_transforms,
)

test_loader=DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)

In [22]:
print(len(test_dataset))

3469


defined my model

In [23]:
class DeiTWithMetadata(nn.Module):
    def __init__(self, num_classes, num_varieties):
        super().__init__()
        
        # Load Pretrained DeiT Model
        self.vision_model = timm.create_model("deit_base_patch16_224", pretrained=True, num_classes=0)  # No final classifier
        deit_feature_dim = 768  # DeiT output size
        
        # Metadata Processing FCNN
        self.variety_fc = nn.Linear(num_varieties, 64)  # Process variety metadata
        self.age_fc = nn.Linear(1, 16)  # Process age metadata
        
        # Combined feature dimension
        combined_dim = deit_feature_dim + 64 + 16  # 768 (DeiT) + 64 (variety) + 16 (age)
        
        # Final Classifier (Fusion of Image + Metadata)
        self.fc = nn.Sequential(
            nn.Linear(combined_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, image, variety, age):
        # Extract features from DeiT
        img_features = self.vision_model(image)  # (Batch, 768)
        
        # Process metadata
        variety_features = torch.relu(self.variety_fc(variety))  # (Batch, 64)
        age_features = torch.relu(self.age_fc(age))  # (Batch, 16)
        
        # Concatenate image & metadata features
        combined_features = torch.cat((img_features, variety_features, age_features), dim=1)  # (Batch, 768+64+16)
        
        # Final classification
        output = self.fc(combined_features)
        return output
    


class DeiTWithoutMetadata(nn.Module):
    def __init__(self, checkpoint_path,num_classes,):
        super().__init__()
        
        # Load Pretrained DeiT Model
        pretrained_model = DeiTWithMetadata(num_classes=num_classes, num_varieties=10)
        pretrained_model.load_state_dict(torch.load(checkpoint_path, map_location=device,weights_only=True))

        self.vision_model = pretrained_model.vision_model
        deit_feature_dim = 768  # DeiT output size
        
        # Final Classifier (Fusion of Image + Metadata)
        self.fc = nn.Sequential(
            nn.Linear(deit_feature_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, image):
        # Extract features from DeiT
        img_features = self.vision_model(image)  # (Batch, 768)
        
        
        # Final classification
        output = self.fc(img_features)
        return output

Created model

In [24]:
model=DeiTWithoutMetadata('paddy_disease_transformer_with_metadata.pth',num_classes=10)
model = model.to(device)

Loaded weights for model 

In [25]:
checkpoint_path = "paddy_disease_transformer_without_metadata.pth"
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),weights_only=True))

<All keys matched successfully>

defined a function which predicts my labels and returns a dataframe with columns - image_id and labels

In [26]:
def prediction_model(model, dataloader):
    model.eval()
    all_preds = []
    image_names = []  # To store image names

    with torch.no_grad():
        for inputs, paths in dataloader:
          inputs = inputs.to(device)

          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)

          all_preds.extend(preds.cpu().numpy())
          image_names.extend(paths)  # Collect image names

    # Store predictions in a DataFrame
    df = pd.DataFrame({
        "image_id": [os.path.basename(p) for p in image_names],  # Extract only filename
        "label": all_preds
    })
    return df

did the predictions

In [27]:
predictions_df = prediction_model(model, test_loader)

created a dictionary to store mappings from class id to label name

In [28]:
class_to_idx={'bacterial_leaf_blight': 0, 'bacterial_leaf_streak': 1, 'bacterial_panicle_blight': 2, 'blast': 3, 'brown_spot': 4, 'dead_heart': 5, 'downy_mildew': 6, 'hispa': 7, 'normal': 8, 'tungro': 9}
idx_to_class = {v: k for k, v in class_to_idx.items()}

print(idx_to_class)

{0: 'bacterial_leaf_blight', 1: 'bacterial_leaf_streak', 2: 'bacterial_panicle_blight', 3: 'blast', 4: 'brown_spot', 5: 'dead_heart', 6: 'downy_mildew', 7: 'hispa', 8: 'normal', 9: 'tungro'}


stored the predicteions in a dataframe with columns - image_id and label, sorted according to image_id

In [None]:
for i in range(len(predictions_df)):
  predictions_df['label'][i]=idx_to_class[predictions_df['label'][i]]

df_sorted = predictions_df.sort_values(by="image_id")

# Save the sorted DataFrame to a new CSV file
df_sorted.to_csv("Transformer_Classification.csv", index=False)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  predictions_df['label'][i]=idx_to_class[predictions_df['label'][i]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-co