In [1]:
import turicreate as tc
import pandas as pd
import numpy as np
import random

class RecommenderClient:
    def __init__(self,data):
        # data is what is returned by Vishnu's client
        self.data = data;
        
        #the ref data is the raw data table
        ref_data = self.ref_data = pd.DataFrame.from_dict(data)
        self.num_of_choices = len(ref_data);
        
        #construct data frame that contains only the training categories
        model_data = self.model_data = ref_data[['category', 'tags']]
        model_data['index'] = model_data.index
        
        self.categories = self.model_data['category'].unique()
        
        #create turicreate SFrame
        tc_data = self.tc_data = tc.SFrame(model_data)
        
        #create model
        model = self.model = tc.recommender.item_content_recommender.create(tc_data, item_id = 'index', verbose=False)
        
        
    def cold_start(self,k=10):
        #k is the number of items to suggest. 
        #randomly suggests k items
        # returns list of indices. 
        
        suggestion_id = [random.randrange(self.num_of_choices) for _ in range(k)]
        
        return suggestion_id
        
        
    def suggest(self, k = 10, likes = [], dislikes = [], categories = []):
        #likes is the list of items that have been liked. 
        #categories is either a string matching the category, or a list of strings matching the categories
        #returns pandas dataframe with index and the score

        
        #create list of allowed items
        if categories  == []:
            allowed_index = list(range(self.num_of_choices));
        else:
            allowed_index = [i for i in range(self.num_of_choices) if self.model_data['category'][i] in categories]
            #allowed_items = tc.SFrame(allowed_index)
        
        recommendations = self.model.recommend_from_interactions(observed_items = likes,
                                                                 k=k,
                                                                 exclude=None, #dislikes,#tc.SFrame(self.model_data.iloc[dislikes]),
                                                                 items=allowed_index,
                                                                 new_user_data=None,
                                                                 new_item_data=None,
                                                                 exclude_known=True,
                                                                 diversity=0,
                                                                 random_seed=None,
                                                                 verbose=False)
        
        return recommendations

In [55]:
import pickle
import os
os.chdir('../data-import/')
if 1:
    %run main.py 
else:
    data = pickle.load( open( "london_poi.pickle", "rb" ) )
os.chdir('../recommender/')

In [56]:
data

[{'type': 'location',
  'subType': 'POINT_OF_INTEREST',
  'geoCode': {'latitude': 41.39165, 'longitude': 2.164772},
  'name': 'Casa Batlló',
  'category': 'SIGHTS',
  'tags': ['sightseeing', 'museum', 'sights', 'landmark']},
 {'type': 'location',
  'subType': 'POINT_OF_INTEREST',
  'geoCode': {'latitude': 41.381794, 'longitude': 2.171936},
  'name': 'La Boqueria',
  'category': 'RESTAURANT',
  'tags': ['restaurant']},
 {'type': 'location',
  'subType': 'POINT_OF_INTEREST',
  'geoCode': {'latitude': 41.40359, 'longitude': 2.17436},
  'name': 'La Sagrada Familia',
  'category': 'SIGHTS',
  'tags': ['sightseeing', 'church', 'temple', 'sights']},
 {'type': 'location',
  'subType': 'POINT_OF_INTEREST',
  'geoCode': {'latitude': 41.38507, 'longitude': 2.16799},
  'name': 'Teresa Carles',
  'category': 'RESTAURANT',
  'tags': ['restaurant', 'vegetarian']},
 {'type': 'location',
  'subType': 'POINT_OF_INTEREST',
  'geoCode': {'latitude': 41.395214, 'longitude': 2.161674},
  'name': 'Casa Milà'

In [3]:
rc = RecommenderClient(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [4]:
rc.cold_start()

[67, 51, 7, 73, 25, 46, 62, 15, 82, 43]

In [5]:
suggestions = rc.suggest(k  = 10 , likes = [0,], dislikes=[83,64],categories=['RESTAURANT','SHOPPING'])

In [6]:
rc.ref_data.iloc[list(suggestions['index'])]

Unnamed: 0,category,geoCode,name,subType,tags,type
83,RESTAURANT,"{'latitude': 51.494324, 'longitude': -0.173618}",Daquise,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
64,SHOPPING,"{'latitude': 51.49227, 'longitude': -0.158959}",Peter Jones,POINT_OF_INTEREST,"[sightseeing, shopping]",location
60,RESTAURANT,"{'latitude': 51.51464, 'longitude': -0.157891}",La Porte des Indes,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
51,RESTAURANT,"{'latitude': 51.506638, 'longitude': -0.187725}",The Orangery,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
36,RESTAURANT,"{'latitude': 51.501553, 'longitude': -0.174984}",Royal Geographical Society,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
33,RESTAURANT,"{'latitude': 51.499073, 'longitude': -0.174051}",Ognisko Restaurant,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
13,SHOPPING,"{'latitude': 51.50159, 'longitude': -0.15972}",Harvey Nichols,POINT_OF_INTEREST,"[sightseeing, shopping]",location
40,SHOPPING,"{'latitude': 51.513874, 'longitude': -0.153936}",Marks & Spencer,POINT_OF_INTEREST,"[sightseeing, shopping, clothing]",location
61,SHOPPING,"{'latitude': 51.513615, 'longitude': -0.155719}",Primark,POINT_OF_INTEREST,"[sightseeing, shopping, clothing, fashion]",location
22,RESTAURANT,"{'latitude': 51.488823, 'longitude': -0.191904}",The Troubadour,POINT_OF_INTEREST,"[sightseeing, restaurant, activities, bar]",location


In [9]:
rc.model_data['category'].unique()

array(['SIGHTS', 'RESTAURANT', 'SHOPPING', 'NIGHTLIFE'], dtype=object)

In [10]:
rc.model_data[(rc.model_data['category'] in ['SIGHTS', 'SHOPPING'])]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [11]:
any(rc.model_data['category']) in ['SIGHTS', "SHOPPING"]

False

In [12]:
allowed_index = [i for i in range(rc.num_of_choices) if rc.model_data['category'][i] in ['SIGHTS', "SHOPPING"]]

In [13]:
allowed_index

[0,
 1,
 2,
 3,
 4,
 5,
 7,
 13,
 16,
 19,
 23,
 24,
 26,
 27,
 28,
 29,
 31,
 32,
 34,
 38,
 40,
 42,
 45,
 57,
 58,
 59,
 61,
 64,
 66,
 71,
 73,
 74,
 79]

In [14]:
rc.model_data['category']

0         SIGHTS
1         SIGHTS
2         SIGHTS
3         SIGHTS
4         SIGHTS
5         SIGHTS
6     RESTAURANT
7         SIGHTS
8     RESTAURANT
9     RESTAURANT
10    RESTAURANT
11    RESTAURANT
12    RESTAURANT
13      SHOPPING
14    RESTAURANT
15    RESTAURANT
16        SIGHTS
17    RESTAURANT
18    RESTAURANT
19        SIGHTS
20    RESTAURANT
21     NIGHTLIFE
22    RESTAURANT
23        SIGHTS
24        SIGHTS
25    RESTAURANT
26        SIGHTS
27        SIGHTS
28        SIGHTS
29        SIGHTS
         ...    
54    RESTAURANT
55    RESTAURANT
56    RESTAURANT
57        SIGHTS
58        SIGHTS
59        SIGHTS
60    RESTAURANT
61      SHOPPING
62    RESTAURANT
63    RESTAURANT
64      SHOPPING
65    RESTAURANT
66        SIGHTS
67    RESTAURANT
68    RESTAURANT
69    RESTAURANT
70    RESTAURANT
71        SIGHTS
72    RESTAURANT
73      SHOPPING
74        SIGHTS
75    RESTAURANT
76    RESTAURANT
77    RESTAURANT
78    RESTAURANT
79        SIGHTS
80    RESTAURANT
81    RESTAURA

In [15]:
rc.tc_data

category,tags,index
SIGHTS,"[sightseeing, artgallerie, museum, ...",0
SIGHTS,"[sightseeing, sights, landmark, historicplace] ...",1
SIGHTS,"[sightseeing, restaurant, museum, cheap] ...",2
SIGHTS,"[sightseeing, shopping, restaurant] ...",3
SIGHTS,"[sightseeing, sights, landmark, historicplace, ...",4
SIGHTS,"[sightseeing, museum]",5
RESTAURANT,"[restaurant, nightlife]",6
SIGHTS,"[sightseeing, sights, park, landmark] ...",7
RESTAURANT,[restaurant],8
RESTAURANT,"[shopping, restaurant, vegetarian] ...",9


In [16]:
list(range(4))

[0, 1, 2, 3]

In [17]:
disallowed=[]

In [18]:
tc.SFrame(rc.model_data.iloc[disallowed])

category,tags,index


In [19]:
rc.model_data.iloc[[5, 6]]

Unnamed: 0,category,tags,index
5,SIGHTS,"[sightseeing, museum]",5
6,RESTAURANT,"[restaurant, nightlife]",6


In [20]:
rc.categories

array(['SIGHTS', 'RESTAURANT', 'SHOPPING', 'NIGHTLIFE'], dtype=object)

In [21]:
cat = 'SIGHTS'

In [24]:
likes = [0,1,3,4]

In [66]:
rc.model.recommend_from_interactions(observed_items = likes, new_item_data=tc_spain_data)

ToolkitError: Invalid type conversion

In [26]:
rc.tc_data

category,tags,index
SIGHTS,"[sightseeing, artgallerie, museum, ...",0
SIGHTS,"[sightseeing, sights, landmark, historicplace] ...",1
SIGHTS,"[sightseeing, restaurant, museum, cheap] ...",2
SIGHTS,"[sightseeing, shopping, restaurant] ...",3
SIGHTS,"[sightseeing, sights, landmark, historicplace, ...",4
SIGHTS,"[sightseeing, museum]",5
RESTAURANT,"[restaurant, nightlife]",6
SIGHTS,"[sightseeing, sights, park, landmark] ...",7
RESTAURANT,[restaurant],8
RESTAURANT,"[shopping, restaurant, vegetarian] ...",9


In [30]:
rc.tc_data

category,tags,index
SIGHTS,"[sightseeing, artgallerie, museum, ...",0
SIGHTS,"[sightseeing, sights, landmark, historicplace] ...",1
SIGHTS,"[sightseeing, restaurant, museum, cheap] ...",2
SIGHTS,"[sightseeing, shopping, restaurant] ...",3
SIGHTS,"[sightseeing, sights, landmark, historicplace, ...",4
SIGHTS,"[sightseeing, museum]",5
RESTAURANT,"[restaurant, nightlife]",6
SIGHTS,"[sightseeing, sights, park, landmark] ...",7
RESTAURANT,[restaurant],8
RESTAURANT,"[shopping, restaurant, vegetarian] ...",9


In [51]:
items = tc.SFrame({'category':['SIGHTS', 'SIGHTS', 'SIGHTS'], 'tags': [['sightseeing', 'restaurant', 'cheap'], ['cheap', 'museum'], ['landmark','vegetarian']],'index':[0,1,2]})

In [52]:
items

category,index,tags
SIGHTS,0,"[sightseeing, restaurant, cheap] ..."
SIGHTS,1,"[cheap, museum]"
SIGHTS,2,"[landmark, vegetarian]"


In [58]:
spain_data = pd.DataFrame.from_dict(data)[['category', 'tags']]

In [62]:
spain_data['index'] = spain_data.index

In [63]:
tc_spain_data = tc.SFrame(spain_data)

In [64]:
tc_spain_data

category,tags,index
SIGHTS,"[sightseeing, museum, sights, landmark] ...",0
RESTAURANT,[restaurant],1
SIGHTS,"[sightseeing, church, temple, sights] ...",2
RESTAURANT,"[restaurant, vegetarian]",3
SIGHTS,"[sightseeing, museum, sights, landmark] ...",4
RESTAURANT,[restaurant],5
RESTAURANT,"[restaurant, tapas]",6
SIGHTS,"[sightseeing, sights, landmark, historicplace, ...",7
SIGHTS,"[sightseeing, restaurant, sights, landmark] ...",8
RESTAURANT,"[sightseeing, restaurant, tapas] ...",9
