In [None]:
#Main Model Computation

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from collections import Counter
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score

from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torch
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from PIL import UnidentifiedImageError, Image
from pathlib import Path
from sklearn.metrics import confusion_matrix
import seaborn as sns



from collections import defaultdict

In [None]:
#Key values
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 8
LR = 1e-4

In [None]:
# ==== LOAD CSV ==== #
df = pd.read_csv(train_csv)
df['filename'] = df['image_id'].apply(lambda x: os.path.join(TRAIN_DIR, x))
label2idx = {label: i for i, label in enumerate(df['soil_type'].unique())}
idx2label = {i: label for label, i in label2idx.items()}
df['label_idx'] = df['soil_type'].map(label2idx)

In [None]:
# ==== WEIGHTED SAMPLER ==== #
class_counts = df['label_idx'].value_counts().to_dict()
weights = df['label_idx'].map(lambda x: 1.0 / class_counts[x])
sampler = WeightedRandomSampler(weights.values, len(weights))

# ==== DATASET CLASS ==== #
class SoilDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['filename']).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = row.get('label_idx', -1)
        return image, label


In [None]:
# ==== TRANSFORMS ==== #
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=Image.BICUBIC),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

transform_test = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=Image.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ==== DATALOADERS ==== #
dataset = SoilDataset(df, transform=transform_train)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=sampler)

# ==== MODEL ==== #
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(label2idx))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [None]:

# ==== TRAINING ==== #

train_losses = []
train_accuracies = []

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    correct = 0
    total = 0
    for images, labels in tqdm(dataloader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    train_losses.append(avg_loss)
    train_accuracies.append(accuracy)

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")



In [None]:
#new
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(range(1, EPOCHS+1), train_losses, marker='o')
plt.title("Training Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")

plt.subplot(1,2,2)
plt.plot(range(1, EPOCHS+1), train_accuracies, marker='o', color='orange')
plt.title("Training Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.tight_layout()
plt.show()


In [None]:
#new
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in DataLoader(dataset, batch_size=BATCH_SIZE):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, preds = outputs.max(1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())


In [None]:
report = classification_report(all_labels, all_preds, target_names=[idx2label[i] for i in range(len(idx2label))], digits=4)
print("Classification Report:\n", report)

macro_f1 = f1_score(all_labels, all_preds, average='macro')
print(f"Macro F1 Score: {macro_f1:.4f}")


In [None]:
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=[idx2label[i] for i in range(len(idx2label))],
            yticklabels=[idx2label[i] for i in range(len(idx2label))], cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()


In [None]:
from sklearn.metrics import balanced_accuracy_score

bal_acc = balanced_accuracy_score(all_labels, all_preds)
print(f"Balanced Accuracy: {bal_acc:.4f}")


In [None]:
# ==== SAVE MODEL ==== #  weights
torch.save(model.state_dict(), 'soil_challenge_1_resnet18.pth')

In [None]:
#saing complete model
torch.save(model, "best_model_challenge_1_full.pt")