In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
import numpy as np

# --- 1. LOAD DATA ---
csv_path = "C:\\SEM7\\Uttarkashi_Flood_Project\\03_Training_Data\\uttarkashi_training_data_6F.csv"
df = pd.read_csv(csv_path)

# Drop any potential leftover non-feature columns (like FID, Shape, etc.)
# We keep only the columns explicitly used in the model.
feature_cols = ['NEAR_DIST', 'DEM_FINAL', 'Slope', 'Aspect', 'Curvature']
df = df[['Flood_poi'] + feature_cols]
df.rename(columns={'Flood_poi': 'Flood_Label_Y'}, inplace=True)

# --- 2. DEFINE FEATURES (X) AND TARGET (Y) ---
Y = df['Flood_Label_Y']
X = df[feature_cols]

print("--- Data Summary ---")
print(f"Total points loaded: {len(df)}")
print(f"Features used: {list(X.columns)}")

# --- 3. SPLIT DATA ---
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

# --- 4. TRAIN AND EVALUATE RANDOM FOREST (RF) ---
print("\n--- Random Forest (RF) Model Results ---")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, Y_train)

# Predict metrics
rf_proba = rf_model.predict_proba(X_test)[:, 1]
rf_accuracy = accuracy_score(Y_test, rf_model.predict(X_test))
rf_auc = roc_auc_score(Y_test, rf_proba)

print(f"RF Accuracy: {rf_accuracy:.4f}")
print(f"RF ROC AUC Score: {rf_auc:.4f}")

# --- 5. FEATURE IMPORTANCE ---
print("\n--- RF Feature Importance (Impact on Flood Risk) ---")
feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
print(feature_importance)

--- Data Summary ---
Total points loaded: 452
Features used: ['NEAR_DIST', 'DEM_FINAL', 'Slope', 'Aspect', 'Curvature']

--- Random Forest (RF) Model Results ---
RF Accuracy: 0.9632
RF ROC AUC Score: 0.9804

--- RF Feature Importance (Impact on Flood Risk) ---
NEAR_DIST    0.660724
DEM_FINAL    0.126143
Aspect       0.078425
Slope        0.072855
Curvature    0.061853
dtype: float64
