In [53]:
import turicreate as tc
import pandas as pd
import numpy as np
import random
import os
os.chdir('../data-import/')
from AmadeusClient import AmadeusClient

am = AmadeusClient()

os.chdir('../recommender/')


class RecommenderClient:
    def __init__(self):
        # data is what is returned by Vishnu's client
        self.data = [];
        self.cities_added = set();
        
    def add_data(self,newdata, city = '', create_model=True):
        
        city = city.lower()
        
        if city in self.cities_added:
            return
        
        self.data += newdata
        
        #the ref data is the raw data table
        ref_data = self.ref_data = pd.DataFrame.from_dict(self.data)
        self.num_of_choices = len(ref_data);
        
        #if city is not an empty string, add the name of the city to the list of cities.
        if not city:
            self.cities_added.add('city')
        
        if create_model:
            self.create_model()
        return
        
        
    def create_model(self):
        
        #construct data frame that contains only the training categories
        model_data = self.model_data = self.ref_data[['category', 'tags','city']]
        model_data['index'] = model_data.index
        
        #update list of categories
        self.categories = self.model_data['category'].unique()
        
        #create turicreate SFrame
        tc_data = self.tc_data = tc.SFrame(model_data)
        
        #create model
        self.model = tc.recommender.item_content_recommender.create(tc_data, item_id = 'index', verbose=False)
        
        return
        
        
    def cold_start(self,k=10):
        #k is the number of items to suggest. 
        #randomly suggests k items
        # returns list of indices. 
        
        suggestion_id = random.sample(list(range(self.num_of_choices)), k=k)
        
        return suggestion_id
        
        
    def suggest(self, k = 10, likes = [], dislikes = [], categories = [], cities = [], diversity=2):
        #likes is the list of items that have been liked. 
        #categories is either a string matching the category, or a list of strings matching the categories
        #returns pandas dataframe with index and the score

        
        #create list of allowed items based on categories
        if categories  == []:
            allowed_index_cat = list(range(self.num_of_choices));
        else:
            allowed_index_cat = [i for i in range(self.num_of_choices) if self.model_data['category'][i] in categories]
            #allowed_items = tc.SFrame(allowed_index)
        
        #create list of allowed items based on city
        if cities == []:
            allowed_index_city = list(range(self.num_of_choices))
        else:
            allowed_index_city = [i for i in range(self.num_of_choices) if self.model_data['city'][i] in cities]
        
        #take intersection
        allowed_index = list(set(allowed_index_cat) & set(allowed_index_city)) 
        
        recommendations = self.model.recommend_from_interactions(observed_items = likes,
                                                                 k=k,
                                                                 exclude=None, #dislikes,#tc.SFrame(self.model_data.iloc[dislikes]),
                                                                 items=allowed_index,
                                                                 new_user_data=None,
                                                                 new_item_data=None,
                                                                 exclude_known=True,
                                                                 diversity=diversity,
                                                                 random_seed=None,
                                                                 verbose=False)
        
        return recommendations
    
    def recs2data(self,recommendations, df = True):
        #recommendations is a pandas frame including the indicies
        #returns the dictionary of data that is desired
        #if df is true, it returns the data frame, or else it returns the dictionary
        
        indices = list(recommendations['index'])
        recdf = self.ref_data.iloc[indices]
        recdf['score'] = recommendations['score']
        
        if df:
            
            return recdf
        
        else:
            return recdf.to_dict()
        
    def ind2data(self,index, df = True):
        #converts list of indices to recommdation data
        if df:
            return self.ref_data.iloc[index]
        else:
            return [self.data[i] for i in index]

In [54]:
import pickle
import pandas
import os

os.chdir('../data-import/')
from AmadeusClient import AmadeusClient

am = AmadeusClient()

recData = am.build_default_data()



Loaded city: Paris, with 20 POIs
Loaded city: Rome, with 20 POIs
Loaded city: Venice, with 20 POIs
Loaded city: London, with 20 POIs
Loaded city: Barcelona, with 20 POIs
Loaded city: Florence, with 20 POIs
Loaded city: Prague, with 20 POIs
Loaded city: Madrid, with 0 POIs
Loaded city: Vienna, with 20 POIs
Loaded city: Istanbul, with 20 POIs
Loaded city: Amsterdam, with 20 POIs


In [55]:
rc = RecommenderClient()
rc.add_data(recData,create_model=False)
rc.create_model()
cold_guess = rc.cold_start(k=30)
rc.ind2data(cold_guess)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,category,city,geoCode,name,subType,tags,type
120,RESTAURANT,Prague,"{'latitude': 50.089848, 'longitude': 14.432865}",Café Imperial,POINT_OF_INTEREST,"[sightseeing, restaurant]",location
138,SIGHTS,Prague,"{'latitude': 50.08972, 'longitude': 14.417222}",Old Jewish Cemetery,POINT_OF_INTEREST,"[sightseeing, museum, sights]",location
104,SIGHTS,Florence,"{'latitude': 43.763065, 'longitude': 11.264667}",Piazzale Michelangelo,POINT_OF_INTEREST,"[sightseeing, restaurant, sights]",location
61,SIGHTS,London,"{'latitude': 51.50078, 'longitude': -0.124601}",Big Ben,POINT_OF_INTEREST,"[sightseeing, restaurant, sights, landmark, hi...",location
108,RESTAURANT,Florence,"{'latitude': 43.77504, 'longitude': 11.25306}",Cappelle Medicee,POINT_OF_INTEREST,"[restaurant, church, museum]",location
180,SIGHTS,Amsterdam,"{'latitude': 52.37522, 'longitude': 4.883956}",Anne Frank House,POINT_OF_INTEREST,"[sightseeing, museum, sights, historicplace]",location
84,RESTAURANT,Barcelona,"{'latitude': 41.38507, 'longitude': 2.16799}",Teresa Carles,POINT_OF_INTEREST,"[restaurant, vegetarian]",location
49,SIGHTS,Venice,"{'latitude': 45.43378, 'longitude': 12.339088}",Bridge of Sighs,POINT_OF_INTEREST,"[sightseeing, sights, historicplace, historic]",location
37,RESTAURANT,Rome,"{'latitude': 41.9008, 'longitude': 12.484928}",Piccolo Arancio,POINT_OF_INTEREST,[restaurant],location
13,RESTAURANT,Paris,"{'latitude': 48.84991, 'longitude': 2.355101}",La Tour d'Argent,POINT_OF_INTEREST,"[sightseeing, restaurant]",location


In [56]:
likes = [7, 88, 56]

In [57]:
berlinlat, berlinlon = 52.5067614,13.2846511

In [58]:
berlin_data = am.get_poi(lat = berlinlat, lon = berlinlon, name='Berlin')

In [59]:
len(berlin_data)

32

In [60]:
rc.add_data(berlin_data,create_model=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [61]:
rc.categories

array(['SIGHTS', 'RESTAURANT', 'SHOPPING', 'NIGHTLIFE'], dtype=object)

In [62]:
 cats = list(rc.categories) != ['RESTAURANTS']

In [63]:
cats

True

In [64]:
recs = rc.suggest(k = 10, likes = likes,categories= ['SIGHTS', 'SHOPPING', 'NIGHTLIFE'],  cities='Berlin')

In [65]:
recs

index,score,rank
214,0.3505222797393799,1
220,0.3007896343866984,2
222,0.1608015100161234,3
209,0.0,4
210,0.0,5
201,0.0,6
208,0.0,7
218,0.0,8
203,0.0,9
202,0.0,10


In [68]:
df = rc.recs2data(recs, df=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [75]:
[df.iloc[i].to_dict() for i in range(len(df))]

[{'category': 'SIGHTS',
  'city': 'Berlin',
  'geoCode': {'latitude': 52.51889, 'longitude': 13.29534},
  'name': 'Bröhan Museum',
  'subType': 'POINT_OF_INTEREST',
  'tags': ['sightseeing', 'artgallerie', 'museum'],
  'type': 'location',
  'score': 0.3505222797393799},
 {'category': 'SIGHTS',
  'city': 'Berlin',
  'geoCode': {'latitude': 52.52012, 'longitude': 13.299757},
  'name': 'Brauhaus Lemke am Schloss',
  'subType': 'POINT_OF_INTEREST',
  'tags': ['sightseeing', 'restaurant'],
  'type': 'location',
  'score': 0.3007896343866984},
 {'category': 'SIGHTS',
  'city': 'Berlin',
  'geoCode': {'latitude': 52.50503, 'longitude': 13.278189},
  'name': 'Funkturm',
  'subType': 'POINT_OF_INTEREST',
  'tags': ['sightseeing', 'sights', 'landmark', 'historicplace', 'historic'],
  'type': 'location',
  'score': 0.16080151001612344},
 {'category': 'SHOPPING',
  'city': 'Berlin',
  'geoCode': {'latitude': 52.509235, 'longitude': 13.304466},
  'name': 'Wilmersdorfer Arcaden',
  'subType': 'POINT

In [None]:
rc.ref_data

In [None]:

#data = am.get_poi(lat=51.50, lon=-0.177)
data = am.get_poi(lat=41.39715, lon=2.160873)

#pickle.dump(data, open('london_poi.pickle', 'wb'))
pickle.dump(data, open('spain_poi.pickle', 'wb'))



In [None]:
data

In [None]:
rc = RecommenderClient(data)

In [None]:
rc.cold_start()

In [None]:
suggestions = rc.suggest(k  = 10 , likes = [0,], dislikes=[83,64],categories=['RESTAURANT','SHOPPING'])

In [None]:
rc.ref_data.iloc[list(suggestions['index'])]

In [None]:
rc.model_data['category'].unique()

In [None]:
rc.model_data[(rc.model_data['category'] in ['SIGHTS', 'SHOPPING'])]

In [None]:
any(rc.model_data['category']) in ['SIGHTS', "SHOPPING"]

In [None]:
allowed_index = [i for i in range(rc.num_of_choices) if rc.model_data['category'][i] in ['SIGHTS', "SHOPPING"]]

In [None]:
allowed_index

In [None]:
rc.model_data['category']

In [None]:
rc.tc_data

In [None]:
list(range(4))

In [None]:
disallowed=[]

In [None]:
tc.SFrame(rc.model_data.iloc[disallowed])

In [None]:
rc.model_data.iloc[[5, 6]]

In [None]:
rc.categories

In [None]:
cat = 'SIGHTS'

In [None]:
likes = [0,1,3,4]

In [None]:
rc.model.recommend_from_interactions(observed_items = likes, new_item_data=tc_spain_data)

In [None]:
rc.tc_data

In [None]:
rc.tc_data

In [None]:
items = tc.SFrame({'category':['SIGHTS', 'SIGHTS', 'SIGHTS'], 'tags': [['sightseeing', 'restaurant', 'cheap'], ['cheap', 'museum'], ['landmark','vegetarian']],'index':[0,1,2]})

In [None]:
items

In [None]:
spain_data = pd.DataFrame.from_dict(data)[['category', 'tags']]

In [None]:
spain_data['index'] = spain_data.index

In [None]:
tc_spain_data = tc.SFrame(spain_data)

In [None]:
tc_spain_data