In [2]:
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from surprise import accuracy, Dataset, Reader, SVD
from surprise.accuracy import rmse

## Functions

In [114]:
def get_inneruid(ruid, train):
    inner_uid = train.to_inner_uid(ruid)
    return inner_uid

def get_inneriid(riid, train):
    inner_iid = traint.to_inner_iid(riid)
    return inner_iid

def get_rawuid(iuid, train):
    raw_uid = train.to_raw_uid(iuid)
    return raw_uid

def get_rawiid(iiid, train):
    raw_iid = train.to_raw_iid(iiid)
    return raw_iid

def all_users_ratings(user_n, dataf):
    return dataf.loc[dataf.user == user_n, 'trail_id'].values

def get_n_trail_recs(rating_df, user_name, trail_df, n):
    
    all_trails = rating_df.trail_id.unique()
    users_trails = all_users_ratings(user_name, rating_df)
    
    user_testset = [t_id for t_id in all_trails if t_id not in users_trails]
    
    rec_list = []
    for trail in user_testset:
        pred = algo.predict(user_name, trail)
        rec_list.append([pred.iid, pred.est])
        
    rec_list = sorted(rec_list, key=lambda x: x[1], reverse=True)
    rec_list = rec_list[0:n]
    
    rec_trails = [rec[0] for rec in rec_list]
    
    recs = trail_df.loc[trail_df.trail_id.isin(rec_trails)]
    for rec in rec_list:
        recs.loc[recs.trail_id == rec[0], 'predicted_rating'] = rec[1]

    return recs.sort_values(by=['predicted_rating'], ascending=False)

def get_n_trail_location_recs(rating_df, user_name, state_name, trail_df, n):
    
    all_trails = rating_df.trail_id.unique()
    users_trails = all_users_ratings(user_name, rating_df)
    location_trails = trail_df.loc[trail_df.state == state_name, 'trail_id'].values
    
    user_testset = [t_id for t_id in all_trails if t_id not in users_trails if t_id in location_trails]
    
    rec_loc_list = []
    for trail in user_testset:
        pred = algo.predict(user_name, trail)
        rec_loc_list.append([pred.iid, pred.est])
    
    rec_loc_list = sorted(rec_loc_list, key=lambda x: x[1], reverse=True)
    rec_loc_list = rec_loc_list[0:n]
    
    rec_trails = [rec[0] for rec in rec_loc_list]
    
    recs = trail_df.loc[trail_df.trail_id.isin(rec_trails)]
    for rec in rec_loc_list:
        recs.loc[recs.trail_id == rec[0], 'predicted_rating'] = rec[1]
    
    return recs.sort_values(by=['predicted_rating'], ascending=False)

## Data Loading

In [10]:
df = pd.read_csv('trail_ratings_colab.csv')

In [117]:
trails = pd.read_csv('trail_stats_cleaned.csv')

## Test User Selection

In [11]:
#Choose the user with the most ratings to compare and contrast with colab and content
#also this user will be used to showcase the production results and functions
df.user.value_counts().head(5)

wisemtnbkr         1286
schillingsworth    1101
lro0001             979
ericfoltz           875
socalstokie         681
Name: user, dtype: int64

In [123]:
users_trails = all_users_ratings('wisemtnbkr', df)
trails.loc[trails.trail_id.isin(users_trails), 'state'].value_counts()

Utah        1197
Colorado      63
Nevada        10
Idaho         10
Wyoming        5
Montana        1
Name: state, dtype: int64

Our top user must be close to Utah as they have rated more trails there than anywhere else in the U.S. Considering the colab filtering method does not take trail details into account, it will be interesting to see if it will recommend more trails in Utah to wisemtnbkr.

## Surprise Data Prep and Model Fit

In [12]:
#loading the pre-prepped data into a surprise dataset class
reader = Reader(
    rating_scale=(1,5),
    sep=',',
    skip_lines=1
)
data = Dataset.load_from_file('trail_ratings_colab.csv', reader)

In [13]:
#builds a testset that includes all trail-user-rating combinations
trainset = data.build_full_trainset()

In [28]:
#tuned this algo in colab tuning and testing notebook
algo = SVD(n_factors=1, n_epochs=50, lr_all=0.007, reg_all=0.05, random_state=45)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a178f0f28>

## Global Recommendations

In [119]:
#this will return the top-n global trail recommendations all trails the user has not rated
get_n_trail_recs(df, 'wisemtnbkr', trails, 10)

Unnamed: 0,trail_id,trail_name,city,state,riding_area,trail_type,total_rides,difficulty_value,difficulty_symbol,dogs_allowed,...,altitude_change,altitude_start,altitude_end,altitude_max,altitude_min,grade,grade_max,grade_min,description,predicted_rating
49730,trail_13954,Monarch Crest,Salida,Colorado,Salida,Singletrack,581.0,Intermediate,Blue Square,Yes,...,-551.0,11378.0,10827.0,11973.0,10827.0,-1.021,-29.652,25.867,Monarch Crest,4.615215
52000,trail_10643,Jazz Chrome Molly,Vernal,Utah,Red Fleet Reservoir,Singletrack,64.0,Intermediate,Blue Square,Yes,...,54.0,5760.0,5815.0,5760.0,5760.0,0.251,-83.741,48.159,"Fun and flowing singletrack. Mostly smooth, bu...",4.545969
46064,trail_7523,San Juan,Coto De Caza,California,Cleveland National Forest,Singletrack,779.0,Intermediate,Blue Square,Yes,...,-2159.0,2999.0,840.0,3009.0,840.0,-6.363,-36.78,37.42,Nice long intermediate single track. There are...,4.470076
35325,trail_7534,Third Divide,Downieville,California,Downieville,Singletrack,2131.0,Intermediate,Blue Square,Yes,...,-1121.0,4630.0,3509.0,4656.0,3509.0,-10.232,-52.763,38.232,Third Divide makes up part of the classic Down...,4.469892
32661,trail_9634,South Fork Little Deer Creek,Sundance,Utah,American Fork Canyon,Singletrack,1235.0,Intermediate,Blue Square,Yes,...,-619.0,8089.0,7471.0,8089.0,7461.0,-5.67,-90.54,42.269,Maybe the best stretch of singletrack in AF Ca...,4.469843
12769,trail_129879,Flow State Lower,Snoqualmie,Washington,Raging River State Forest,Singletrack,2320.0,Easy,Green Circe,Yes,...,-255.0,1366.0,1111.0,1366.0,1111.0,-8.154,-28.69,11.951,Flow State Lower,4.454577
15880,trail_9080,Hidden Canyon,Hurricane,Utah,Hurricane,Singletrack,669.0,Very Difficult,Black Diamond,Yes,...,-62.0,5329.0,5267.0,5337.0,5229.0,-0.649,-20.647,70.6,This is extreme because it is difficult to rid...,4.447909
33625,trail_162771,Stinger,Burnet,Texas,Spider Mountain Bike Park,Singletrack,0.0,Extremely Difficult,Dbl Black Diamond,Yes,...,-324.0,1426.0,1101.0,1418.0,1094.0,-21.185,-43.71,9.867,This trail is crazy steep Like a few seconds ...,4.43513
1677,trail_12798,Aspen Alley,Breckenridge,Colorado,Breckenridge,Singletrack,567.0,Intermediate,Blue Square,Yes,...,523.0,9843.0,10366.0,10381.0,9842.0,7.719,-18.444,34.473,Fast and mostly smooth berms that throw you th...,4.43095
12703,trail_76566,Flat Rock Ranch Upper Loop,Comfort,Texas,Flat Rock Ranch,Singletrack,50.0,Intermediate,Blue Square,Yes,...,29.0,1549.0,1577.0,1894.0,1546.0,0.05,-31.064,24.125,No Description,4.421216


Three of the users top 10 trails are from his most reviewed state. Going further, 5 of the top 10 recommendations come from the users 2 most reviewed states. From the global recommendation perspective, the SVD model did a good job of finding trails similar to where the rider commonly rides.

## Location Specific Recommendations

In [118]:
#this returns top-n trails a user has not rated in a given state
get_n_trail_location_recs(df, 'wisemtnbkr', 'Utah', trails, 10)

Unnamed: 0,trail_id,trail_name,city,state,riding_area,trail_type,total_rides,difficulty_value,difficulty_symbol,dogs_allowed,...,altitude_change,altitude_start,altitude_end,altitude_max,altitude_min,grade,grade_max,grade_min,description,predicted_rating
52000,trail_10643,Jazz Chrome Molly,Vernal,Utah,Red Fleet Reservoir,Singletrack,64.0,Intermediate,Blue Square,Yes,...,54.0,5760.0,5815.0,5760.0,5760.0,0.251,-83.741,48.159,"Fun and flowing singletrack. Mostly smooth, bu...",4.545969
32661,trail_9634,South Fork Little Deer Creek,Sundance,Utah,American Fork Canyon,Singletrack,1235.0,Intermediate,Blue Square,Yes,...,-619.0,8089.0,7471.0,8089.0,7461.0,-5.67,-90.54,42.269,Maybe the best stretch of singletrack in AF Ca...,4.469843
15880,trail_9080,Hidden Canyon,Hurricane,Utah,Hurricane,Singletrack,669.0,Very Difficult,Black Diamond,Yes,...,-62.0,5329.0,5267.0,5337.0,5229.0,-0.649,-20.647,70.6,This is extreme because it is difficult to rid...,4.447909
19631,trail_9201,Little Creek Slick,Hurricane,Utah,Little Creek Mountain,Singletrack,173.0,Very Difficult,Black Diamond,Yes,...,11.0,5670.0,5681.0,5690.0,5549.0,0.087,-16.274,24.097,Much like the rest of Little Creek but with a ...,4.398181
43762,trail_9383,Tibble Fork,Sundance,Utah,American Fork Canyon,Singletrack,281.0,Very Difficult,Black Diamond,Yes,...,-1403.0,8087.0,6684.0,8092.0,6684.0,-10.462,-37.927,16.005,The bottom section has seen some work in recen...,4.373867
46802,trail_9543,Ridge 157 (North),Sundance,Utah,American Fork Canyon,Singletrack,48.0,Very Difficult,Black Diamond,Yes,...,-68.0,9437.0,9369.0,9867.0,8771.0,-0.196,-220.859,255.34,"Really fun sections of trail, though often get...",4.344772
33831,trail_10685,Stump Hollow (GWT),Logan,Utah,Logan Canyon,Singletrack,359.0,Intermediate,Blue Square,Yes,...,1426.0,6958.0,8384.0,8387.0,6947.0,6.186,-22.02,51.585,"Amazing trail with a rough, steep and occasion...",4.324861
34351,trail_10681,Syncline North,Logan,Utah,Logan,Singletrack,9.0,Intermediate,Blue Square,Yes,...,-845.0,8812.0,7967.0,8812.0,7859.0,-3.552,-208.668,139.462,"Incredible trail, but hard to access. Either ...",4.222769
51383,trail_51423,Lava Flow,Cedar City,Utah,Iron Hills Trail System (AKA: Southview),Singletrack,611.0,Intermediate,Blue Square,Yes,...,-455.0,6481.0,6025.0,6537.0,6025.0,-4.76,-17.845,13.174,This is a really nice flow trail that has all ...,4.202396
32816,trail_9321,South Rim,Hurricane,Utah,Gooseberry Mesa,Singletrack,893.0,Very Difficult,Black Diamond,Yes,...,205.0,5160.0,5365.0,5442.0,5130.0,0.726,-88.135,156.903,South Rim,4.202363


Wisemtnbkrs recommendations for his unrated trails are all showing high-potential to be 4+ ratings. None of the Utah trails are in the top-10 for the content-based approach, which is not surprising considering the inputs are both different. All of the recommended trails have a lot of total rides and are on the more difficult end of the spectrum. There seems to be a trend with higher-rated and ridden trails being more difficult than their lower rated counterparts.

I think the best way to approach final recommendations would be to blend the two methods into one engine with some logic to denote when to recommend one trail over another. This could be done after each model has generated recommendations and choose the top-n recs based on some trail factors.