# Trail Recommendations

List everything that we need to do.

In [None]:
import Trailforks as tf
import TrailforksScraper as tfs
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Import all the files

In [None]:
trails_checkins = pd.read_csv('trail-checkins.csv')
trailforks_trails = pd.read_csv('trailforks-trails.csv')
wta_trails = pd.read_csv('ResultsZip.csv')

## Getting data from Trailforks

- 'activitytype=6' for hiking
- Trailstyle = Popularity

Trailforks api documentation: https://www.trailforks.com/about/api/#!/trail

Trailforks popularity heatmap: https://www.trailforks.com/region/united-states/?activitytype=6&z=10.4&lat=48.30699&lon=-120.42421&trailstyle=popularity

Trailforks popularity scores (sorted in descending order): https://www.trailforks.com/region/united-states/trails/?sort=t.popularity_score&order=desc&difficulty=2,3,4,11,9,5,6,8&activitytype=6

In [None]:
trailForksScrapper = tfs.trailforksScrapper()

In [None]:
# This code was used to scrate all the trails in north-carolina and washington 
# from trailforks along with their popularity.

# north_carolina = trailForksScrapper.fetchTrailsByRegionAndPages('north-carolina',23)
# washington = trailForksScrapper.fetchTrailsByRegionAndPages('washington',72)

Convert trail titles to have only words separated by -
This will be used in url for scraping checkins.

In [None]:
# north_carolina['title'] = north_carolina['title'].str.replace('\W', ' ').str.lower()
# north_carolina['title'] = north_carolina['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')
# washington['title'] = washington['title'].str.replace('\W', ' ').str.lower()
# washington['title'] = washington['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')

Getting trail stats for all the trails collected previously.

In [None]:
# This code can be used to scrape trails checkins and store them in dataframe.

'''import pandas as pd 

df = pd.DataFrame()
for trail in trails:
    df_trail = trailForksScrapper.fetchTrailStats(trail)
    if df_trail is not None:
        df_trail['trail'] = trail
        df = pd.concat([df,df_trail])

df'''

## Merge all the tables to create one dataset

Cleaning up datasets:
- Removing trails with title unknown and NAN
- Removing unnamed columns
- filtering checkins greater than 0

In [None]:
trailforks_trails = trailforks_trails.dropna(subset='title')
trailforks_trails = trailforks_trails[~trailforks_trails['title'].str.contains('unknown')]
trailforks_trails = trailforks_trails[['title','riding area','rating','distance','descent','climb','popularity_score']]
trailforks_trails['title'] = trailforks_trails['title'].str.replace('--','-')
trailforks_trails

In [None]:
wta_trails['title_wta'] = wta_trails['TITLE']
wta_trails['title_wta'] = wta_trails['title_wta'].str.replace('\W', ' ').str.lower()
wta_trails['title_wta'] = wta_trails['title_wta'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')
wta_trails['title_wta'] = wta_trails['title_wta'].str.replace('--','-',regex=True) ## this is not working. Need to check why
wta_trails = wta_trails.drop(columns=['Unnamed: 0'])
wta_trails

In [None]:
trails_checkins = trails_checkins[['Period','Check-Ins','trail']]
trails_checkins = trails_checkins[trails_checkins['Check-Ins']>0]
trails_checkins

Combine trailforks and wta trails data based on titles matching as substrings of each other. This needs to be done because same trails have slightly different names.

In [195]:
dataset_1 = trailforks_trails
dataset_2 = wta_trails
dataset_1['title'] = dataset_1['title'].str.replace('-trail','')
dataset_2['title_wta'] = dataset_2['title_wta'].str.replace('-trail','')
result_dataset = pd.DataFrame()
for i in range(len(dataset_2)):
    row_matched = dataset_1[dataset_1['title'].str.contains(dataset_2.iloc[i]['title_wta'])]
    if len(row_matched) > 0:
        df = dataset_2[dataset_2['title_wta']==dataset_2.iloc[i]['title_wta']]
        row_matched[df.columns] = None
        for j in range(0,len(row_matched)):
            row_matched.loc[row_matched.index[j],df.columns] = df.iloc[0]
        result_dataset = pd.concat([result_dataset,row_matched])
        
result_dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row_matched[df.columns] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row_matched[df.columns] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row_matched[df.columns] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = val

Unnamed: 0.1,title,riding area,rating,distance,descent,climb,popularity_score,Unnamed: 0,TITLE,REGION,...,GAIN,HIGHEST,RATING,RATING_COUNT,LATITUDE,LONGITUDE,REPORT_DATE,REPORT_COUNT,URL,title_wta
545,raven-roost,Norse Peak Wilderness,,4 miles,"-1,008 ft",855 ft,0,0,Raven Roost,Mount Rainier Area,...,,,2.75,4,47.008852,-121.115564,2018-10-07,6,https://www.wta.org/go-hiking/hikes/raven-roost,raven-roost
501,rainbow-ridge-bootpath,Lake Chelan National Rec. Area (North Cascades...,,2 miles,-133 ft,"1,218 ft",0,9,Rainbow Ridge,North Cascades,...,1700.0,5300.0,2.88,8,48.763431,-121.699677,2020-09-28,30,https://www.wta.org/go-hiking/hikes/rainbow-ridge,rainbow-ridge
1706,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3000.0,8243.0,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain
4862,pyramid-mountain,Mad River - Entiat,,14 miles,"-3,813 ft","4,841 ft",55,15,Pyramid Mountain,Central Cascades,...,3000.0,8243.0,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain
6751,pyramid-mountain-peak,Mad River - Entiat,,3 miles,-274 ft,"1,623 ft",95,15,Pyramid Mountain,Central Cascades,...,3000.0,8243.0,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain


In [201]:
result_dataset = result_dataset.drop_duplicates()
result_dataset = result_dataset.dropna(subset=['TITLE'])
len(result_dataset['title_wta'].unique())

953

In [204]:
dataset_3 = trails_checkins[['Period','Check-Ins','trail']]
dataset_3['trail'] = dataset_3['trail'].str.replace('-trail','')
combined_trails = pd.merge(result_dataset,dataset_3.set_index('trail'),left_on='title',right_on='trail',how='inner')
combined_trails

Unnamed: 0.1,title,riding area,rating,distance,descent,climb,popularity_score,Unnamed: 0,TITLE,REGION,...,RATING,RATING_COUNT,LATITUDE,LONGITUDE,REPORT_DATE,REPORT_COUNT,URL,title_wta,Period,Check-Ins
0,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain,11am,2
1,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain,3pm,3
2,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain,4pm,2
3,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain,7pm,1
4,pyramid-mountain,Port Angeles,,3 miles,-283 ft,"2,671 ft",0,15,Pyramid Mountain,Central Cascades,...,3.0,4,48.018056,-120.505278,2020-08-01,44,https://www.wta.org/go-hiking/hikes/pyramid-mo...,pyramid-mountain,May,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1585457,enchanted-forest,Fisher Farm Park,,"1,576 ft",-27 ft,31 ft,95,3903,Enchanted Forest Trail,Puget Sound and Islands,...,0.0,0,48.988488,-123.041787,2019-05-26,1,https://www.wta.org/go-hiking/hikes/enchanted-...,enchanted-forest,2022723,1
1585458,enchanted-forest,Fisher Farm Park,,"1,576 ft",-27 ft,31 ft,95,3903,Enchanted Forest Trail,Puget Sound and Islands,...,0.0,0,48.988488,-123.041787,2019-05-26,1,https://www.wta.org/go-hiking/hikes/enchanted-...,enchanted-forest,2022726,1
1585459,enchanted-forest,Fisher Farm Park,,"1,576 ft",-27 ft,31 ft,95,3903,Enchanted Forest Trail,Puget Sound and Islands,...,0.0,0,48.988488,-123.041787,2019-05-26,1,https://www.wta.org/go-hiking/hikes/enchanted-...,enchanted-forest,2022731,1
1585460,enchanted-forest,Fisher Farm Park,,"1,576 ft",-27 ft,31 ft,95,3903,Enchanted Forest Trail,Puget Sound and Islands,...,0.0,0,48.988488,-123.041787,2019-05-26,1,https://www.wta.org/go-hiking/hikes/enchanted-...,enchanted-forest,202288,1
