# Model notebook for trail prediction


# Trails to Avoid when weather is bad:

Sweet Connie Trail #77
Cottonwood Creek Trail #27
Old Pen Trail #15A
Table Rock Trail #15
Polecat Loop #81
Big Springs Loop #113
Ridgecrest #20
Bucktail Trail #20A
Central Ridge Spur #22A (north)
Central Ridge Spur #22A (south)
Red Cliffs #39

# Alternative trails when  muddy:

Rocky Canyon Road
Mountain Cove Road
Upper 8th Street Road
The Boise Greenbelt
Boise City Parks
Wilson Creek Trails (south of Nampa)

# All-Weather Trails - Good under almost every weather condition:

Shoshone-Bannock Tribes Trail #19A
Rim Trail
Harrison Hollow Trail #57
Oregon Trail
Upper Portion of Basalt Trail
Red Fox Trail #36
Gold Finch #35
Owl's Roost #37
Hulls Pond Loop #34
The Grove #38
Red-Winged Blackbird #35A
Mountain Cove #22C
Eagle Ridge Loop #25A

# Good bets (due to sandier soils) when conditions are marginal:

Dry Creek Trail #78
Lower Hulls Gulch Trail #29
Camel's Back Trails #40
Toll Road #27A
Freestone Ridge #5

In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import joblib

In [17]:
# df = pd.read_csv("historical_weather.csv")
#used to read the data set
df = pd.read_csv('closure_data.csv')


In [18]:
df.head()

Unnamed: 0,Date,Max Temperature (F),Min Temperature (F),Precipitation (mm),Avoid,Alternatives,All Weather,Good Bets
0,11/15/2023,67,41,0.0,0,0,0,0
1,11/14/2023,61,37,0.0,0,0,0,0
2,11/13/2023,63,39,0.0,0,0,0,0
3,11/12/2023,66,40,0.0,0,0,0,0
4,11/11/2023,59,41,0.0,0,0,0,0


In [19]:
df.describe()

Unnamed: 0,Max Temperature (F),Min Temperature (F),Precipitation (mm),Avoid,Alternatives,All Weather,Good Bets
count,745.0,745.0,745.0,745.0,745.0,745.0,745.0
mean,64.774497,42.319463,0.034913,0.514094,0.0,0.068456,0.195973
std,21.972004,16.212466,0.110646,0.500137,0.0,0.252697,0.397215
min,18.0,-3.0,0.0,0.0,0.0,0.0,0.0
25%,47.0,29.0,0.0,0.0,0.0,0.0,0.0
50%,63.0,41.0,0.0,1.0,0.0,0.0,0.0
75%,84.0,56.0,0.01,1.0,0.0,0.0,0.0
max,106.0,78.0,1.46,1.0,0.0,1.0,1.0


In [20]:
# Convert 'Date' column to datetime and extract features
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

In [21]:
df['Alternatives'] = df['Alternatives'].replace('Caution', 1).astype(int)

In [22]:
# # function to fill in trail conditions for the new column.  values are examples
# def fill_trail_condition(row):
#     if row['Max Temperature (F)'] > 70 and row['Precipitation (mm)'] < 0.035581:
#         return 'Good'
#     elif 50 <= row['Max Temperature (F)'] <= 70 or 0.035581 <= row['Precipitation (mm)'] < 0.1:
#         return 'Moderate'
#     else:
#         return 'Poor'
#
# # Apply the function to each row
# df['Trail_Condition'] = df.apply(fill_trail_condition, axis=1)

# 2. Model Training

In [23]:
# X = df.drop('Trail_Condition', axis=1)
# y = df['Trail_Condition']
X = df[['Year', 'Month', 'Day', 'Max Temperature (F)', 'Min Temperature (F)', 'Precipitation (mm)']]
y = df[['Avoid', 'Alternatives', 'All Weather', 'Good Bets']]

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

In [25]:
scaler = StandardScaler()
# categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [26]:
# # preprocessor for numerical and categorical
# preprocessor = ColumnTransformer(
#     transformers=[
#         ('num', numerical_transformer, numerical_cols),
#         ('cat', categorical_transformer, categorical_cols)
#     ])

In [27]:
# pipeline = Pipeline(steps=[('preprocessor', preprocessor),
#                            ('classifier', RandomForestClassifier())])
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier(random_state=42,n_estimators=100,min_samples_split=3,bootstrap=True,max_features='sqrt',min_samples_leaf=1))
])

# Training the pipeline
pipeline.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()),
                ('rf',
                 RandomForestClassifier(max_features='sqrt',
                                        min_samples_split=3,
                                        random_state=42))])

In [28]:
data = {
    'Year': '2023',
    'Month' : '01',
    'Day' : '01',
    'Max Temperature (F)': 50,
    'Min Temperature (F)': 40,
    'Precipitation (mm)': 1.0
}
df2 = pd.DataFrame([data])
test_predict = pipeline.predict(df2)
test_predict

array([[1, 0, 0, 1]])

# 3. Evaluation & Deployment

In [29]:
# Predicting and evaluating the model
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")

Model accuracy: 0.6879194630872483


In [30]:
joblib.dump(pipeline, 'pipelineFINAL.joblib')

['pipelineFINAL.joblib']