In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


1. Import Libraries

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from PIL import Image
import os

2.Load data and preprocessing

In [4]:
train_dir = '/content/drive/MyDrive/soil or not/train'
test_dir = '/content/drive/MyDrive/soil or not/test'
train_df = pd.read_csv('/content/drive/MyDrive/soil or not/train_labels.csv')
test_df = pd.read_csv('/content/drive/MyDrive/soil or not/test_ids.csv')


def load_and_resize_image(filepath, target_size=(64, 64)):
    try:
        img = Image.open(filepath).convert('RGB')
        img = img.resize(target_size)
        return np.array(img)
    except Exception as e:
        print(f"Error loading image {filepath}: {e}")
        return None

In [6]:
train_images = []
train_labels = []
for index, row in train_df.iterrows():
    img_path = os.path.join(train_dir, row[train_df.columns[0]])
    img = load_and_resize_image(img_path)
    if img is not None:
        train_images.append(img.flatten())
        train_labels.append(row[train_df.columns[1]])

train_images = np.array(train_images)
train_labels = np.array(train_labels)

test_images = []
test_image_ids = []

if test_df.shape[1] == 1:
    test_df['label'] = 'unknown'

for index, row in test_df.iterrows():
    img_path = os.path.join(test_dir, row[test_df.columns[0]])
    img = load_and_resize_image(img_path)
    if img is not None:
        test_images.append(img.flatten())
        test_image_ids.append(row[test_df.columns[0]])

test_images = np.array(test_images)


X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

Build Model

In [8]:
from sklearn.ensemble import IsolationForest

model = IsolationForest(contamination=0.18)
model.fit(X_train)

predictions = model.predict(test_images)
val_predictions = model.predict(X_val)

In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score

f1 = f1_score(y_val, val_predictions)
print(f"Validation F1 Score: {f1}")

precision = precision_score(y_val, val_predictions)
print(f"Validation Precision: {precision}")

recall = recall_score(y_val, val_predictions)
print(f"Validation Recall: {recall}")

Validation F1 Score: 0.8888888888888888
Validation Precision: 1.0
Validation Recall: 0.8


In [11]:
import json
from google.colab import files
from sklearn.metrics import precision_score, recall_score, f1_score

f1 = f1_score(y_val, val_predictions, average='weighted')

precision = precision_score(y_val, val_predictions, average='weighted')

recall = recall_score(y_val, val_predictions, average='weighted')

metrics_data = {
    "IF_validation_weighted_f1_score": f1,
    "IF_validation_weighted_precision": precision,
    "IF_validation_weighted_recall": recall
}

filename = 'metrics_IF.json'

with open(filename, 'w') as f:
    json.dump(metrics_data, f, indent=4)

files.download(filename)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
mapped_predictions = np.where(predictions == -1, 0, predictions)
mapped_predictions

array([1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,

Save Model

In [None]:
submission_df = pd.DataFrame({'image_id': test_image_ids, 'soil_type': mapped_predictions})

submission_df.to_csv('/content/irfoneclass.csv', index=False)
