#Make Full Recomendation Based on User Input

This notebook will make recomendations based on user input.

Input: 
User ID or User name (str)

Output: Destination Suggestions (str), 10 or so suggested climbs (int) in that area, and a what kind of climber the user is based on latent features of model.

##Import modules and load in data

In [14]:
import graphlab as gl
import numpy as np
import pandas as pd
import psycopg2, pickle, json
from collections import defaultdict

#user name to user id dict
with open('user_map.p','r') as f:
    user_map = pickle.load(f)

#star rating and climb data
df_data = pd.read_csv('star5.csv')
with open('df_raw_star5.p','r') as f:
    df_raw = pickle.load(f)
    
#load in recommendation models
sim_mod = gl.load_model('sim_mod')
rfr_mod = gl.load_model('rfm_mod_15')
rfr_mod_lf = gl.load_model('rfm_mod_features_extracted')

First, we need to determine what the input was.  If it's a user id that we have, then we proceed with recommendation.  If it's a user name, we find the user id and proceed as above.  If it's a climb, or series of climbs, we make recomendations based on those climbs.  

In [2]:
def determine_input(input_str):
    try:
        id_val = int(input_str)
        if id_val in df_data.User.values:
            return id_val
        else: 
            return None
    except ValueError:
        if input_str in user_map:
            return user_map[input_str]
        else:
            return None

In [3]:
input_str = 'LauraColyer'
user_id = determine_input(input_str)

We will suggest the destination and climbs based on two models.  We will proceed through the top recomendations until ~3-5 areas the user has not rated a climb in have at least ~10 climbs suggested that are within their climbing ability.  We will suggest those areas and the recommended climbs in them.  

In [4]:
def user_rated_visited(user_id):
    #Find destinations climber has rated climbs from
    climbs_rated = df_data[df_data.User==user_id].Climb
    visited = set(df_raw.loc[climbs_rated].sub_location.values)
  
    #Finder user difficulty rating range from stared climbs
    user_ratings = df_raw.loc[climbs_rated].rating
    user_rating_std = user_ratings.std()
    user_rating_mean = user_ratings.mean()
    rating_max = user_rating_mean+user_rating_std
    
    return visited, rating_max

In [5]:
visited, rating_max = user_rated_visited(user_id)


In [6]:
#Find climb locations to recommend
def rec_loc_climb(user_id, visited, rating_max, 
                  model, verbose=False, n_areas=3, n_climbs=10):
    
    climb_recs = model.recommend(users=[user_id], k=13000)
    loc_climb_recs = defaultdict(list)
    loc_recs = []
    n_recs = 0
    for rec in climb_recs:
        climb = rec['Climb']
        if df_raw.loc[climb].rating < rating_max:
            loc = df_raw.loc[climb].sub_location
            if loc not in (list(visited) + loc_recs):
                loc_climb_recs[loc] += [climb]
                if len(loc_climb_recs[loc]) == n_climbs:
                    loc_recs += [loc]
                    n_recs += 1
                    if n_recs == n_areas:
                        break
    if verbose:
        for loc in loc_recs:
            print loc
            print loc_climb_recs[loc]
    return loc_recs, loc_climb_recs

In [7]:
def climb_recs_sim(user_id, visited, rating_max, verbose=False):
    #Get climb recomendations from item similarity model
    sim_recs = sim_mod.recommend(users=[user_id], k=1000)
    loc_recs_sim, loc_climb_recs_sim = rec_loc_climb(sim_recs,
                                                       visited,
                                                       rating_max)
    if verbose:
        for loc in loc_recs_sim:
            print loc
            print loc_climb_recs_sim[loc]
    return loc_recs_sim, loc_climb_recs_sim

In [8]:
#Get climb recomendations from item similarity model
loc_recs_sim, loc_climb_recs_sim = rec_loc_climb(user_id,
                                                  visited,
                                                  rating_max,
                                                  sim_mod,
                                                  verbose=True)

Yosemite National Park
[105862915, 105840361, 105862896, 105924807, 106154042, 107429493, 105862873, 105912192, 107677399, 105872907]
Custer State Park
[107684661, 108052987, 106053351, 105715232, 105714734, 107775061, 107810730, 108244256, 107517307, 105714761]
Adirondacks
[106532800, 107708237, 106831219, 107564731, 107185274, 106542971, 106092527, 106197345, 106078832, 106594953]


In [9]:
#Get climb recomendations from item similarity model
loc_recs_rfr, loc_climb_recs_rfr = rec_loc_climb(user_id,
                                                  visited,
                                                  rating_max,
                                                  rfr_mod,
                                                  verbose=True)

Yosemite National Park
[105862915, 105924807, 106154042, 105945535, 105862991, 105862873, 105862896, 106167844, 106187777, 105847471]
Joshua Tree National Park
[105721666, 105725788, 105722743, 105722305, 105723325, 105722050, 105725389, 105723478, 105722227, 105722431]
Cathedral Ledge
[105880759, 105919971, 105909672, 105880787, 105922177, 105938087, 105903672, 105920872, 105949212, 105924990]


We also want to say something about the user based on the latent features of rfr model.

In [17]:
coefs = rfr_mod_lf.get('coefficients')
df_fac_user = pd.DataFrame(np.array(coefs['User']['factors']))
df_fac_user.set_index(np.array(coefs['User']['User']), inplace=True)

In [19]:
df_fac_user.loc[user_id]

0    0.057407
1   -0.119440
2   -0.098539
3   -0.049110
Name: 107953067, dtype: float64

##Scratch work below

In [144]:
#Find recommended climbs in rating range and in area
m_rfr = gl.load_model('rfm_mod_15')
rfr_recs = m_rfr.recommend(users=[user_id], k=13000)

loc_recs_copy = list(loc_recs)
rfr_climb_recs = defaultdict(list)
for i,rec in enumerate(rfr_recs):
    climb = df_raw.loc[rec['Climb']]
    if (climb.rating > user_rating_range[0]) and \
            (climb.rating < user_rating_range[1]):
        if climb.sub_location in loc_recs_copy:
            rfr_climb_recs[climb.sub_location] += [rec['Climb']]
            #print i,climb.sub_location
            if len(rfr_climb_recs[climb.sub_location]) == 10:
                loc_recs_copy.remove(climb.sub_location)
                if not loc_recs_copy:
                    break
print json.dumps(rfr_climb_recs, indent=2)      

{
  "North America": [
    105889511, 
    105947052, 
    105842825, 
    105872943, 
    105941644, 
    105910904, 
    105845816, 
    105910806, 
    106065449, 
    105959460
  ], 
  "Joshua Tree National Park": [
    105721666, 
    105725788, 
    105722743, 
    105722305, 
    105723325, 
    105722050, 
    105725389, 
    105723478, 
    105722227, 
    105722431
  ], 
  "Boulder": [
    108095626, 
    105764211, 
    105748999, 
    105748412, 
    105748265, 
    105751315, 
    105750454, 
    105748093, 
    105749080, 
    105748948
  ], 
  "Moab Area": [
    105930583, 
    105717487, 
    105717562, 
    105718567, 
    105718072, 
    107920861, 
    106789524, 
    105717451, 
    105717649, 
    105926324
  ], 
  "Wasatch Range": [
    105739898, 
    105739895, 
    105740096, 
    105740654, 
    105859843, 
    105740039, 
    105740519, 
    105740879, 
    105741404, 
    105740639
  ]
}
