In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# 1. Load training data from CSV (x, y, value columns)
train_samples = pd.read_csv('data.csv')  # Ensure the CSV has columns: x, y, value

# 2. Prepare features (X) and target (y)
X = train_samples[['x', 'y']]  # Coordinates (features)
y = train_samples['value']     # Target variable (value)

# 3. Encode the categorical target labels (value column)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# 5. Train the model (Random Forest Classifier)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# 6. Evaluate the model on the test set
y_pred = clf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# 7. Predict on the test data and decode the labels back to their original values
predicted_labels = label_encoder.inverse_transform(y_pred)

# 8. Show predictions for the test set
print("\nPredictions for the test set:")
for idx, label in enumerate(predicted_labels):
    print(f"Point {idx + 1}: (Lat: {X_test.iloc[idx, 1]}, Lon: {X_test.iloc[idx, 0]}) - Predicted: {label}")


Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.63      0.38      0.47       130
           2       0.15      0.05      0.07        66
           3       0.40      0.31      0.35      1006
           4       0.00      0.00      0.00        25
           5       0.72      0.83      0.77      2945
           6       0.17      0.07      0.10        58
           7       0.15      0.08      0.10       144

    accuracy                           0.65      4378
   macro avg       0.28      0.21      0.23      4378
weighted avg       0.61      0.65      0.62      4378


Predictions for the test set:
Point 1: (Lat: 54.70988729502913, Lon: -6.21848240881492) - Predicted: Land Use
Point 2: (Lat: 54.21654497292472, Lon: -6.461627897426398) - Predicted: Land Use
Point 3: (Lat: 54.43581030405246, Lon: -5.47553451276012) - Predicted: Land Use
Point 4: (Lat: 54.64245866030541, Lon: -7.095590852492