# Import Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%run mpFunctions.py

# Import the datasets

In [2]:
peopleFilepath = './Data/pickledPeople.pkl'
voteFilepath = './Data/pickledVotes_unpacked.pkl'
routeFilepath = './Data/pickledRoutes_unpacked.pkl'

people = pd.read_pickle(peopleFilepath)
votes = pd.read_pickle(voteFilepath)
routes = pd.read_pickle(routeFilepath)

## Function to find common people for an area

In [3]:
# Function to find all common people between N most-popular routes
def findCommonPeople(areaUrl, breadcrumbTier, nRoutes=10, routes=routes, people=people):
    areaRoutes = routes.loc[routes[breadcrumbTier]==areaUrl]
    areaRoutes = areaRoutes.sort_values('numQualityVotes', ascending=False)
    areaRoutes = areaRoutes[:nRoutes]
    
    subPeople = set([])
    
    # Filter the people dataset to only include those with at least nRoutes starRatings
    people = people.loc[people['numStarRatings']>=nRoutes*5]
            
    for i in range(len(areaRoutes)):
        tempSet=[]
        print("Parsing peopleDF for route", i+1, "of", nRoutes)
        for j in range(len(people)):
            if areaRoutes.index[i] in people.iloc[j]['starRatings']:
                tempSet.append(people.index[j])
                print(j/len(people), end='\r')        
        if i==0:
            subPeople=set(tempSet)
        else:
            subPeople = subPeople.intersection(set(tempSet))
            
    print(len(subPeople),"have climbed the", nRoutes, "most-rated climbs at", areaUrl) 

    return subPeople
    

## Isolate the People and Routes to be used for classification

In [54]:
# Isolate the People
eldoUrl = 'https://www.mountainproject.com/area/105744246/eldorado-canyon-sp'
subpeople = findCommonPeople(eldoUrl, 'bc3')

# Isolate the Routes
eldo = routes.loc[routes['bc3']=='https://www.mountainproject.com/area/105744246/eldorado-canyon-sp']
eldo = eldo.sort_values(by='numQualityVotes', ascending=False)
eldo = eldo[:10]

Parsing peopleDF for route 1 of 10
Parsing peopleDF for route 2 of 10
Parsing peopleDF for route 3 of 10
Parsing peopleDF for route 4 of 10
Parsing peopleDF for route 5 of 10
Parsing peopleDF for route 6 of 10
Parsing peopleDF for route 7 of 10
Parsing peopleDF for route 8 of 10
Parsing peopleDF for route 9 of 10
Parsing peopleDF for route 10 of 10
95 have climbed the 10 most-rated climbs at https://www.mountainproject.com/area/105744246/eldorado-canyon-sp


## Separate People into Training and Testing datasets

In [138]:
# Make the training dataset
train = people.loc[subpeople[0]]
for i in range(1, int(len(subpeople)/2)):
    train = pd.concat([train, people.loc[subpeople[i]]], axis=1)
train = train.transpose()

# Make the test dataset
test = people.loc[subpeople[int(len(subpeople)/2)]]
for i in range(int(len(subpeople)/2)+1, len(subpeople)):
    test = pd.concat([test, people.loc[subpeople[i]]], axis=1)
test = test.transpose()

# All together, just in case it is needed
total = pd.concat([test, train])

## Create an array of starValues

In [186]:
npTrain = np.zeros(shape=(10, len(train)), dtype=int)
for i in range(len(eldo)):
    for j in range(len(train)):
        npTrain[i][j] = train.iloc[j]['starRatings'][eldo.index[i]]
        
# Print, for demonstration
for i in range(len(npTrain)):
    print(eldo.index[i][:53], npTrain[i][0:30])

https://www.mountainproject.com/route/105748490/the-b [3 4 3 4 3 3 4 4 4 3 4 4 3 4 4 3 4 3 3 4 3 3 4 4 2 3 3 4 4 4]
https://www.mountainproject.com/route/105750106/rewri [4 3 4 4 4 4 3 4 4 4 4 4 3 4 4 2 4 3 4 4 4 3 4 4 3 4 3 4 3 4]
https://www.mountainproject.com/route/105748657/the-y [4 3 4 4 4 4 4 4 4 3 4 3 4 3 4 4 4 4 4 3 4 3 3 4 4 4 3 3 4 4]
https://www.mountainproject.com/route/105748361/wind- [2 2 3 3 4 4 3 4 3 3 3 3 3 4 4 3 3 3 3 3 2 3 3 3 3 3 3 3 4 3]
https://www.mountainproject.com/route/105748391/calyp [2 3 2 3 3 3 3 4 2 3 3 2 3 3 2 2 3 3 2 3 3 2 3 4 2 3 3 4 3 3]
https://www.mountainproject.com/route/105749890/werk- [4 3 3 4 4 3 3 3 4 3 4 3 3 2 4 2 3 3 3 4 3 2 4 3 3 3 3 4 3 4]
https://www.mountainproject.com/route/105748774/ruper [4 3 4 4 4 3 3 4 4 4 4 3 4 4 4 3 4 4 3 4 3 4 3 3 3 4 4 4 4 2]
https://www.mountainproject.com/route/105748639/blind [4 3 4 4 4 3 3 4 4 4 3 3 3 3 4 2 4 3 3 4 3 3 4 3 3 4 4 4 4 4]
https://www.mountainproject.com/route/105748924/long- [3 3 3 4 4 4 4 4 4

***
### Visualization to see general star distribution

In [185]:
# Visualization to see general star distribution
d = {0:0, 1:0, 2:0, 3:0, 4:0}
r = list(eldo.index)
for i in range(len(total)):
    for j in range(len(eldo)):
        d[total.iloc[i]['starRatings'][r[j]]] = d[total.iloc[i]['starRatings'][r[j]]]+1
    
print()
for i in d:
    print(i, ":", d[i])
print()
    
for i in range(len(total)):
    print("User", i, end=': ')
    for j in range(len(eldo)):
        print(total.iloc[i]['starRatings'][r[j]], end=' ')
    print()



0 : 0
1 : 0
2 : 67
3 : 419
4 : 464

User 0: 4 4 3 4 2 4 4 3 4 4 
User 1: 4 4 4 3 4 3 4 4 3 4 
User 2: 4 3 4 3 3 4 3 3 3 3 
User 3: 3 4 4 3 3 3 4 4 2 4 
User 4: 4 4 4 4 3 3 3 4 4 4 
User 5: 3 3 4 2 2 3 2 2 2 3 
User 6: 4 4 4 3 3 4 4 4 4 2 
User 7: 3 3 4 4 2 3 3 3 3 4 
User 8: 2 3 4 4 3 4 4 3 3 4 
User 9: 3 3 2 3 2 3 3 2 2 3 
User 10: 4 3 3 4 3 4 4 4 3 3 
User 11: 4 3 4 4 3 3 4 4 4 4 
User 12: 4 3 4 4 2 3 4 3 3 3 
User 13: 3 4 3 3 3 3 4 4 3 4 
User 14: 4 4 4 4 4 4 4 4 4 4 
User 15: 3 3 4 3 3 3 4 4 3 3 
User 16: 2 4 4 3 3 3 4 3 4 2 
User 17: 4 4 4 4 3 3 4 4 3 3 
User 18: 4 4 4 3 3 4 4 4 3 3 
User 19: 3 4 4 2 3 4 4 4 4 4 
User 20: 2 4 4 3 3 2 4 4 4 3 
User 21: 4 4 4 4 3 3 4 4 3 4 
User 22: 4 3 4 4 3 3 4 3 3 2 
User 23: 3 3 4 2 3 3 4 4 3 4 
User 24: 4 3 4 3 3 3 4 4 3 3 
User 25: 3 3 4 2 3 3 4 3 3 3 
User 26: 3 4 4 3 3 4 4 4 4 4 
User 27: 4 4 4 3 3 4 4 4 4 4 
User 28: 4 4 2 3 3 4 4 3 3 4 
User 29: 4 4 4 4 3 3 4 4 4 4 
User 30: 4 3 4 4 4 4 4 4 4 4 
User 31: 4 4 4 3 3 3 3 3 3 3 
User 32: 4 4 

# Looking for a better area

In [20]:
# Isolate the People
avalonUrl = 'https://www.mountainproject.com/area/105745528/avalon'
subpeople = findCommonPeople(avalonUrl, 'bc4', 5)

# Isolate the Routes
avalon = routes.loc[routes['bc4']=='https://www.mountainproject.com/area/105745528/avalon']
avalon = avalon.sort_values(by='numQualityVotes', ascending=False)
avalon = avalon[:10]

# Report the average rating of the climb
findAvgRating(avalon)

Parsing peopleDF for route 1 of 5
Parsing peopleDF for route 2 of 5
Parsing peopleDF for route 3 of 5
Parsing peopleDF for route 4 of 5
Parsing peopleDF for route 5 of 5
43 have climbed the 5 most-rated climbs at https://www.mountainproject.com/area/105745528/avalon


2.63