In [1]:
import turicreate as tc

In [2]:
# Load data 
user_tour_data = tc.SFrame.read_csv('./dataset/user_tour_data.csv', verbose=False)
user_data = tc.SFrame.read_csv('./dataset/user_data.csv', verbose=False)
tour_data = tc.SFrame.read_csv('./dataset/tour_data.csv', verbose=False)

In [3]:
user_tour_data.print_rows(user_tour_data.shape[0]) # What users went on which tours

+--------+--------+--------+
| userId | tourId | rating |
+--------+--------+--------+
|   1    |   1    |   5    |
|   2    |   3    |   4    |
|   3    |   1    |   2    |
|   3    |   2    |   3    |
|   1    |   3    |   4    |
|   4    |   2    |   2    |
|   4    |   4    |   3    |
|   5    |   2    |   5    |
|   6    |   3    |   3    |
|   7    |   3    |   4    |
|   6    |   2    |   4    |
+--------+--------+--------+
[11 rows x 3 columns]



In [4]:
user_data.print_rows(user_data.shape[0]) # Info about the users

+--------+-----+--------+
| userId | age | gender |
+--------+-----+--------+
|   1    |  21 |   M    |
|   2    |  80 |   M    |
|   3    |  35 |   F    |
|   4    |  35 |   M    |
|   5    |  22 |   F    |
|   6    |  60 |   F    |
|   7    |  70 |   M    |
+--------+-----+--------+
[7 rows x 3 columns]



In [5]:
tour_data.print_rows(tour_data.shape[0]) # Info about the tours

+--------+------------+------------+----------+
| tourId | lengthDist | lengthTime |  genre   |
+--------+------------+------------+----------+
|   1    |     5      |     20     | religion |
|   2    |     15     |     45     |  nature  |
|   3    |     10     |     60     | religion |
|   4    |     25     |     90     |  sports  |
+--------+------------+------------+----------+
[4 rows x 4 columns]



In [6]:
# Train-test split
training_data, validation_data = tc.recommender.util.random_split_by_user(user_tour_data, 'userId', 'tourId')

In [7]:
#Create model
model = tc.recommender.ranking_factorization_recommender.create(training_data, 'userId', 'tourId',
                                                                user_data=user_data,
                                                                item_data=tour_data,
                                                                target='rating')

In [8]:
# Print results
results = model.recommend()
results.print_rows(results.shape[0])
# All the tours a given user has NOT been on are ranked according to which ones the user is most likely to favor

+--------+--------+---------------------+------+
| userId | tourId |        score        | rank |
+--------+--------+---------------------+------+
|   1    |   2    |  1.3811455553687906 |  1   |
|   1    |   4    |  1.2757776626486006 |  2   |
|   2    |   1    |  1.6829483048298042 |  1   |
|   2    |   2    |  1.604869236253153  |  2   |
|   2    |   4    | 0.44283097232276036 |  3   |
|   3    |   4    |  1.4659470978022808 |  1   |
|   3    |   3    |  1.3830893715364283 |  2   |
|   4    |   3    |  1.4552188110989874 |  1   |
|   4    |   1    |  0.6465770651231102 |  2   |
|   4    |   4    |  0.5241770210505718 |  3   |
|   5    |   3    |  1.6874135788168907 |  1   |
|   5    |   4    |  1.6321558282658506 |  2   |
|   5    |   1    |  1.527417584019278  |  3   |
|   6    |   1    |  1.4050291318938686 |  1   |
|   6    |   4    |  1.3080779391086952 |  2   |
|   7    |   3    |  5.436029652176663  |  1   |
|   7    |   1    |  5.2453893463514785 |  2   |
|   7    |   2    | 

### <i>(Analysis / validation of model pending.)</i>

Model choice: Ranking Factorization Recommender
Reasoning: Implicit data, lack of ratings, additional relevant user & tour data
Info on TC Recommender models: https://turi.com/learn/userguide/recommender/choosing-a-model.html

Validation strategy: Update mock data to create set of distinct user/tour pairs such that predictions for new users fitting each profile should be obvious.