In [None]:
#load dependencies
# Initial imports.
import pandas as pd
import numpy as np
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
#Load the data set into a modeling DataFrame
file_path = "Resources/trail_data_clean.csv"
trail_data_df = pd.read_csv(file_path)
trail_data_df.head()

In [None]:
#Convert route_type into numerical values
trail_data_encoded = pd.get_dummies(trail_data_df, columns=["route_type"])
trail_data_encoded

In [None]:
#Drop unnessary columns for ML modeling
trail_data_encoded.drop(columns=["name",
                                 "area_name",
                                "city_name",
                                "state_name",
                                "country_name",
                                "features",
                                "activities",
                                "update_name",
                                "update_state",
                                "trail_url",
                                "Lat",
                                "Lng"], inplace=True)
trail_data_encoded.head()

In [None]:
#Define the target set (AVG_RATING FOR NOW)
y = trail_data_encoded["avg_rating"]

In [None]:
#Define the features set
X = trail_data_encoded.drop(columns="avg_rating")

In [None]:
#Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 78)

In [None]:
#Scale and Normalize the data

#Create an instance of StandardScaler
scaler = StandardScaler()

#Fit the StandardScale with the training data
X_scaler = scaler.fit(X_train)

#Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#Create a random forest classifier
trail_rf_model = RandomForestClassifier(n_estimators = 128, random_state=78)

In [None]:
#Fit the model
trail_rf_model = trail_rf_model.fit(X_train_scaled,y_train)

In [None]:
#Make predictions using the testing data
predictions = trail_rf_model.predict(X_test_scaled)

In [None]:
#Evaluate the model
#Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [None]:
#Calculate the confusion matrix
cm = confusion_matrix(y_test, predictions)

#Create DataFrame from confusion matrix
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1", columns=["Predicted 0", "Predicted 1"]])

In [None]:
#Display model results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score:{acc_score}"")
print("Classification Report")
print(classificaion_report(y_test,predictions))

In [None]:
#Rank the Importance of Features
sorted(zip(trail_rf_model.feature_importances_,X.columns),reverse=True)