## HikeSafe Advisor Training Notebook
This notebook trains and evaluates a decision tree classifier on simulated trail risk data and exports the model for the Streamlit app.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import matplotlib.pyplot as plt
import numpy as np


### Load data


In [None]:
df = pd.read_csv('../data/trails_sample.csv')
df.head()


### Feature / label split


In [None]:
feature_cols = [
    'distance_km',
    'elevation_gain_m',
    'max_altitude_m',
    'min_temperature_c',
    'exposed_ridge',
    'slippery_surface',
    'estimated_duration_h'
]
X = df[feature_cols]
y = df['difficulty_label']
X.head()


### Train / test split


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)
len(X_train), len(X_test)


### Train decision tree


In [None]:
clf = DecisionTreeClassifier(
    max_depth=4,
    random_state=42
)
clf.fit(X_train, y_train)


### Evaluation


In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


### Feature importance


In [None]:
importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(8, 4))
plt.bar(range(len(importances)), importances[indices])
plt.xticks(range(len(importances)), [feature_cols[i] for i in indices], rotation=45)
plt.ylabel('Importance')
plt.title('Feature importance for trail risk model')
plt.tight_layout()
plt.show()


### Save model


In [None]:
joblib.dump(clf, '../models/risk_model.pkl')
