In [1]:
#load dependencies
# Initial imports.
import pandas as pd
import numpy as np
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
#Load the data set into a modeling DataFrame
file_path = "Resources/trail_data_clean.csv"
trail_data_df = pd.read_csv(file_path)
trail_data_df.head()

Unnamed: 0,name,area_name,city_name,state_name,country_name,length,elevation_gain,difficulty_rating,route_type,avg_rating,...,ice-climbing,snowboarding,hiking,fly-fishing,nature-trips,update_name,update_state,trail_url,Lat,Lng
0,Harding Ice Field Trail,Kenai Fjords National Park,Seward,Alaska,United States,15610.598,1161.8976,5,out and back,5.0,...,0,0,1,0,1,Harding-Ice-Field-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Hard...,60.18852,-149.63156
1,Mount Healy Overlook Trail,Denali National Park,Denali National Park,Alaska,United States,6920.162,507.7968,3,out and back,4.5,...,0,0,1,0,1,Mount-Healy-Overlook-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Moun...,63.73049,-148.91968
2,Exit Glacier Trail,Kenai Fjords National Park,Seward,Alaska,United States,2896.812,81.9912,1,out and back,4.5,...,0,0,1,0,0,Exit-Glacier-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Exit...,60.18879,-149.631
3,Horseshoe Lake Trail,Denali National Park,Denali National Park,Alaska,United States,3379.614,119.7864,1,loop,4.5,...,0,0,1,0,1,Horseshoe-Lake-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Hors...,63.73661,-148.915
4,Triple Lakes Trail,Denali National Park,Denali National Park,Alaska,United States,29772.79,1124.712,5,out and back,4.5,...,0,0,1,0,1,Triple-Lakes-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Trip...,63.73319,-148.89682


In [3]:
#Convert route_type into numerical values
trail_data_encoded = pd.get_dummies(trail_data_df, columns=["route_type"])
trail_data_encoded

Unnamed: 0,name,area_name,city_name,state_name,country_name,length,elevation_gain,difficulty_rating,avg_rating,num_reviews,...,fly-fishing,nature-trips,update_name,update_state,trail_url,Lat,Lng,route_type_loop,route_type_out and back,route_type_point to point
0,Harding Ice Field Trail,Kenai Fjords National Park,Seward,Alaska,United States,15610.598,1161.8976,5,5.0,423,...,0,1,Harding-Ice-Field-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Hard...,60.18852,-149.63156,0,1,0
1,Mount Healy Overlook Trail,Denali National Park,Denali National Park,Alaska,United States,6920.162,507.7968,3,4.5,260,...,0,1,Mount-Healy-Overlook-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Moun...,63.73049,-148.91968,0,1,0
2,Exit Glacier Trail,Kenai Fjords National Park,Seward,Alaska,United States,2896.812,81.9912,1,4.5,224,...,0,0,Exit-Glacier-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Exit...,60.18879,-149.63100,0,1,0
3,Horseshoe Lake Trail,Denali National Park,Denali National Park,Alaska,United States,3379.614,119.7864,1,4.5,237,...,0,1,Horseshoe-Lake-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Hors...,63.73661,-148.91500,1,0,0
4,Triple Lakes Trail,Denali National Park,Denali National Park,Alaska,United States,29772.790,1124.7120,5,4.5,110,...,0,1,Triple-Lakes-Trail,Alaska,https://www.alltrails.com/trail/us/Alaska/Trip...,63.73319,-148.89682,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3308,Silversword Loop Via Halemau'u Trail,Haleakala National Park,Kula,Maui,Hawaii,20116.750,1105.8144,5,4.5,43,...,0,1,Silversword-Loop-Via-Halemau'u-Trail,Maui,https://www.alltrails.com/trail/us/Maui/Silver...,20.75275,-156.22884,1,0,0
3309,Keonehe'ehe'e Trail,Haleakala National Park,Kula,Maui,Hawaii,28324.384,1171.9560,5,5.0,22,...,0,0,Keonehe'ehe'e-Trail,Maui,https://www.alltrails.com/trail/us/Maui/Keoneh...,20.71448,-156.25072,0,1,0
3310,Red Hill Overlook Summit Trail,Haleakala National Park,Kula,Maui,Hawaii,321.868,3.9624,1,4.5,31,...,0,0,Red-Hill-Overlook-Summit-Trail,Maui,https://www.alltrails.com/trail/us/Maui/Red-Hi...,20.71007,-156.25357,0,1,0
3311,Kaupo Trail,Haleakala National Park,Kula,Maui,Hawaii,19312.080,1670.9136,5,4.0,8,...,0,0,Kaupo-Trail,Maui,https://www.alltrails.com/trail/us/Maui/Kaupo-...,20.64981,-156.13700,0,1,0


In [4]:
#change from float to rounded integer - RF cannot use decimals
trail_data_encoded["avg_rating"]=round(trail_data_encoded["avg_rating"]).astype(int)
trail_data_encoded["length"]=round(trail_data_encoded["length"]).astype(int)
trail_data_encoded["elevation_gain"]=round(trail_data_encoded["elevation_gain"]).astype(int)

In [5]:
#Drop unnessary columns for ML modeling
trail_data_encoded.drop(columns=["name",
                                 "area_name",
                                "city_name",
                                "state_name",
                                "country_name",
                                "features",
                                "activities",
                                "update_name",
                                "update_state",
                                "trail_url",
                                "Lat",
                                "Lng"], inplace=True)
trail_data_encoded.head()

Unnamed: 0,length,elevation_gain,difficulty_rating,avg_rating,num_reviews,river,beach,strollers,forest,city-walk,...,cross-country-skiing,snowshoeing,ice-climbing,snowboarding,hiking,fly-fishing,nature-trips,route_type_loop,route_type_out and back,route_type_point to point
0,15611,1162,5,5,423,1,0,0,1,0,...,0,0,0,0,1,0,1,0,1,0
1,6920,508,3,4,260,0,0,0,1,0,...,0,0,0,0,1,0,1,0,1,0
2,2897,82,1,4,224,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
3,3380,120,1,4,237,0,0,0,1,0,...,0,0,0,0,1,0,1,1,0,0
4,29773,1125,5,4,110,0,0,0,0,0,...,0,0,0,0,1,0,1,0,1,0


In [6]:
trail_data_encoded.dtypes

length                       int32
elevation_gain               int32
difficulty_rating            int64
avg_rating                   int32
num_reviews                  int64
river                        int64
beach                        int64
strollers                    int64
forest                       int64
city-walk                    int64
partially-paved              int64
kids                         int64
rails-trails                 int64
dogs                         int64
waterfall                    int64
dogs-leash                   int64
ada                          int64
paved                        int64
hot-springs                  int64
views                        int64
dogs-no                      int64
cave                         int64
historic-site                int64
wildlife                     int64
wild-flowers                 int64
lake                         int64
mountain-biking              int64
surfing                      int64
off-road-driving    

In [None]:
# #Scale and normalize the data
# data_scaler = StandardScaler()

# #Train the scaler adn transform the data
# trail_data_scaled = data_scaler.fit_transform(trail_data_encoded)

# trail_data_scaled

In [7]:
#Define the target set (AVG_RATING FOR NOW)
y = trail_data_encoded["avg_rating"]

In [8]:
#Define the features set
X = trail_data_encoded.drop(columns="avg_rating")

In [9]:
#Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 78)

In [10]:
#Scale and Normalize the data

#Create an instance of StandardScaler
scaler = StandardScaler()

#Fit the StandardScale with the training data
X_scaler = scaler.fit(X_train)

#Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
#Create a random forest classifier
trail_rf_model = RandomForestClassifier(n_estimators = 128, random_state=78)

In [12]:
#Fit the model
trail_rf_model = trail_rf_model.fit(X_train_scaled, y_train)

In [13]:
#Make predictions using the testing data
predictions = trail_rf_model.predict(X_test_scaled)

In [14]:
#Evaluate the model
#Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)
acc_score

0.8178528347406514

In [None]:
#Calculate the confusion matrix
# cm = confusion_matrix(y_test, predictions)

# #Create DataFrame from confusion matrix
# cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

In [None]:
# #Display model results
# print("Confusion Matrix")
# display(cm_df)
# print(f"Accuracy Score:{acc_score}"")
# print("Classification Report")
# print(classificaion_report(y_test,predictions))

In [15]:
#Rank the Importance of Features
sorted(zip(trail_rf_model.feature_importances_,X.columns),reverse=True)

[(0.2445539012703648, 'num_reviews'),
 (0.14202825313575496, 'elevation_gain'),
 (0.13550606023075001, 'length'),
 (0.041587928983998566, 'difficulty_rating'),
 (0.023990968534794124, 'forest'),
 (0.022231321246464986, 'wild-flowers'),
 (0.022107785688580427, 'trail-running'),
 (0.02106715779985434, 'river'),
 (0.020537030613641743, 'wildlife'),
 (0.01998179624865921, 'lake'),
 (0.01988121991020442, 'birding'),
 (0.01978580338727111, 'nature-trips'),
 (0.019390195257037633, 'dogs-no'),
 (0.018909259657676685, 'backpacking'),
 (0.017328532876169086, 'walking'),
 (0.0170358871132583, 'route_type_out and back'),
 (0.016624800703424394, 'camping'),
 (0.015863642319969784, 'kids'),
 (0.015474518658264136, 'route_type_loop'),
 (0.014799775403351983, 'waterfall'),
 (0.013596989788821973, 'route_type_point to point'),
 (0.011740353733257557, 'views'),
 (0.010253957070056444, 'horseback-riding'),
 (0.009242967452799558, 'dogs-leash'),
 (0.008942098382550705, 'fishing'),
 (0.007765887303826105, 