<a href="https://colab.research.google.com/github/Vraddhi/soilClassification_annam/blob/main/challenge1/notebooks/Soil_Classification_Part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install numpy pandas matplotlib scikit-learn opencv-python torch torchvision torchaudio


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/soil_classification-2025/train_labels.csv')

label_mapping = {
    'Alluvial soil': 0,
    'Black Soil': 1,
    'Clay soil': 2,
    'Red soil': 3
}

df['label_enc'] = df['soil_type'].map(label_mapping)

print(df.head())


           image_id      soil_type  label_enc
0  img_ed005410.jpg  Alluvial soil          0
1  img_0c5ecd2a.jpg  Alluvial soil          0
2  img_ed713bb5.jpg  Alluvial soil          0
3  img_12c58874.jpg  Alluvial soil          0
4  img_eff357af.jpg  Alluvial soil          0


In [None]:
import pandas as pd
import os
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
from tqdm import tqdm


In [None]:
class SoilDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['image_id']
        label = int(self.dataframe.iloc[idx]['label_enc'])
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label_enc'], random_state=42)


In [None]:
train_dataset = SoilDataset(train_df, '/content/drive/MyDrive/soil_classification-2025/train', transform=transform)
val_dataset = SoilDataset(val_df, '/content/drive/MyDrive/soil_classification-2025/train', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)  # 4 soil classes

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 77.5MB/s]


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(10):
    model.train()
    running_loss = 0.0

    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


100%|██████████| 31/31 [07:58<00:00, 15.43s/it]


Epoch 1, Loss: 0.44581927202882304


100%|██████████| 31/31 [04:53<00:00,  9.48s/it]


Epoch 2, Loss: 0.07427927352968723


100%|██████████| 31/31 [04:49<00:00,  9.32s/it]


Epoch 3, Loss: 0.031042446998218374


100%|██████████| 31/31 [04:58<00:00,  9.63s/it]


Epoch 4, Loss: 0.017571238380286


100%|██████████| 31/31 [04:50<00:00,  9.38s/it]


Epoch 5, Loss: 0.01982156197810846


100%|██████████| 31/31 [04:55<00:00,  9.55s/it]


Epoch 6, Loss: 0.03947663111161561


100%|██████████| 31/31 [04:51<00:00,  9.40s/it]


Epoch 7, Loss: 0.027441395814680764


100%|██████████| 31/31 [04:57<00:00,  9.61s/it]


Epoch 8, Loss: 0.009992734551610004


100%|██████████| 31/31 [04:50<00:00,  9.36s/it]


Epoch 9, Loss: 0.006851986756608371


100%|██████████| 31/31 [04:52<00:00,  9.43s/it]

Epoch 10, Loss: 0.004039430324243562





In [None]:
# TEST
class TestSoilDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_name = self.dataframe.iloc[idx]['image_id']
        image_path = os.path.join(self.image_dir, image_name)
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image_name, image


In [None]:
test_df = pd.read_csv('/content/drive/MyDrive/soil_classification-2025/test_ids.csv')


In [None]:
test_dataset = TestSoilDataset(test_df, '/content/drive/MyDrive/soil_classification-2025/test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
label_reverse_mapping = {0: 'Alluvial soil', 1: 'Black Soil', 2: 'Clay soil', 3: 'Red soil'}


In [None]:
model.eval()
predictions = []

with torch.no_grad():
    for image_names, images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predicted = predicted.cpu().numpy()

        for name, pred in zip(image_names, predicted):
            predictions.append((name, label_reverse_mapping[pred]))


In [None]:
submission_df = pd.DataFrame(predictions, columns=['image_id', 'soil_type'])
submission_df.to_csv('submission.csv', index=False)


In [None]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

f1s = f1_score(all_labels, all_preds, average=None)
print(f"Per-class F1 scores: {f1s}")
print(f"Min F1 score: {min(f1s)}")


Per-class F1 scores: [0.97142857 0.9787234  0.95       1.        ]
Min F1 score: 0.95
