In [105]:
import pandas as pd
import numpy as np
from itertools import permutations
from itertools import combinations_with_replacement

In [106]:
def create_main_table():

    # Loading datasets
    locations = pd.read_csv("https://raw.githubusercontent.com/gabriellecastilho/datasets/master/indonesia_tourism.csv")
    ratings = pd.read_csv("https://raw.githubusercontent.com/gabriellecastilho/datasets/master/indonesia_tourism_rating.csv")
    users = pd.read_csv("https://raw.githubusercontent.com/gabriellecastilho/datasets/master/indonesia_tourism_user.csv")

    # Merging / Joining datasets
    main_table = pd.merge(ratings, users, on='User_Id', how='inner')
    main_table = pd.merge(main_table, locations, on="Place_Id", how="inner")

    # Creating column "Age Range"
    main_table["Age_Range"] = pd.cut(main_table['Age'], bins=[0, 17, 25, 35, 50, 65, 100], labels=['0-17', '18-25', '26-35', '36-50', '51-65', '65+'])

    # Selecting only relevant attributes
    main_table = main_table[["User_Id", "Age", "Age_Range", "Place_Id", "Place_Name", "Category", "City", "Rating", "Place_Ratings"]]

    # Sorting values by user id and ratings
    main_table = main_table.sort_values(by=["User_Id", "Rating", "Place_Ratings"], ascending=[True, False, False])

    # Reordering table index
    main_table = main_table.reset_index(drop=True)

    # Translating categories to English
    def translate_category(item):
        if item == "Taman Hiburan": return "Amusement Park"
        elif item == "Tempat Ibadah": return "Place of Worship"
        elif item == "Budaya": return "Culture"
        elif item == "Cagar Alam": return "Natural Reserve"
        elif item == "Bahari": return "Nautical"
        elif item == "Pusat Perbelanjaan": return "Shopping Center"

    main_table["Category"] = main_table[["Category"]].applymap(translate_category)

    return main_table

In [107]:
# Output
main_table = create_main_table()
main_table.head(3)

Unnamed: 0,User_Id,Age,Age_Range,Place_Id,Place_Name,Category,City,Rating,Place_Ratings
0,1,20,18-25,103,Tugu Pal Putih Jogja,Amusement Park,Yogyakarta,4.7,3
1,1,20,18-25,302,Masjid Pusdai,Place of Worship,Bandung,4.7,2
2,1,20,18-25,258,Museum Gedung Sate,Culture,Bandung,4.6,5


In [108]:
def create_probability_table(user, main_table):

    # Filtering main table by target user
    user_table = main_table[main_table["User_Id"] == user]

    # Creating dataframe with user history category count
    prob_table = pd.DataFrame()
    prob_table["Count"] = user_table["Category"].value_counts()
    prob_table.reset_index(inplace=True)
    prob_table = prob_table.rename(columns = {'index':'Category'})

    # Creating column "Total" with total number of places visited by the user
    prob_table["Total"] = prob_table["Count"].sum()

    # Creating and calculating column "Probability" for each category
    prob_table["Probability"] = prob_table["Count"] / prob_table["Total"]

    return prob_table

In [109]:
# Output for user 1
prob_table = create_probability_table(1, main_table)
prob_table

Unnamed: 0,Category,Count,Total,Probability
0,Culture,12,30,0.4
1,Natural Reserve,9,30,0.3
2,Amusement Park,5,30,0.166667
3,Nautical,2,30,0.066667
4,Place of Worship,1,30,0.033333
5,Shopping Center,1,30,0.033333


In [110]:
def create_transition_matrix(user, main_table):

    # Calling the function create_probability_table for the categories the user has visited
    prob = create_probability_table(user, main_table)

    # Creating the categories transitions for the transition matrix
    perm = list(permutations(prob["Category"].unique(), 2))
    comb = list(combinations_with_replacement(prob["Category"].unique(), 2))
    transition_matrix = pd.DataFrame(data = perm + comb, columns = ("Category_1", "Category_2"))
    transition_matrix = transition_matrix.drop_duplicates().reset_index(drop=True)

    # Calculating the probability of a user to visit a category based on the category of the page he is seeing at the moment
    def probability(row):
        prob1 = prob["Probability"][prob["Category"] == row[0]].iloc[0]
        prob2 = prob["Probability"][prob["Category"] == row[1]].iloc[0]
        return prob1 * prob2 / prob1

    transition_matrix["Probability"] = transition_matrix.apply(probability, axis=1)

    return transition_matrix

In [111]:
# Output example for user 1 visiting a page from category "Culture"
transition_matrix = create_transition_matrix(1, main_table)
transition_matrix[transition_matrix["Category_1"] == "Culture"]

Unnamed: 0,Category_1,Category_2,Probability
0,Culture,Natural Reserve,0.3
1,Culture,Amusement Park,0.166667
2,Culture,Nautical,0.066667
3,Culture,Place of Worship,0.033333
4,Culture,Shopping Center,0.033333
30,Culture,Culture,0.4


In [112]:
def recommend_category(user, category, transition_matrix):

    # Selecting a new category based on the transition matrix probabilities
    new_category = np.random.choice(transition_matrix["Category_2"][transition_matrix["Category_1"] == category].values,
                                replace=True,
                                p=transition_matrix["Probability"][transition_matrix["Category_1"] == category].values)
    return new_category

In [122]:
# Output
category = recommend_category(1, "Culture", transition_matrix)
category

'Culture'

In [123]:
def create_rating_age_table(city, main_table):

    # Filtering places by target city
    places_by_city = main_table[main_table["City"] == city]

    # Selecting the necessary columns and calculating the average rating by age range
    rating_age = places_by_city[["Age_Range", "Place_Name", "Place_Id", "Category", "Rating"]].groupby(["Age_Range", "Category", "Place_Name", "Place_Id"]).mean().dropna()

    # Ranking best places by rating
    rating_age = rating_age.sort_values(["Age_Range", "Rating"], ascending=[True, False])
    rating_age.reset_index(inplace=True)

    return rating_age

In [124]:
# Output example in Jakarta
rating_age = create_rating_age_table("Jakarta", main_table)
rating_age.head(3)

Unnamed: 0,Age_Range,Category,Place_Name,Place_Id,Rating
0,18-25,Culture,Freedom Library,69,5.0
1,18-25,Shopping Center,Wisata Kuliner Pecenongan,29,5.0
2,18-25,Culture,Galeri Indonesia Kaya,49,4.8


In [125]:
def create_user_history(user, main_table):

    # Listing the id of places visited by users
    def list_locations(row):
        return list(row['Place_Id'])

    user_history = main_table.groupby('User_Id').apply(list_locations)

    # Returning only the id of places visited by target user
    return user_history[user]

In [117]:
# Output
user_history = create_user_history(1, main_table)
user_history[:15]

[103, 302, 258, 208, 179, 292, 222, 428, 48, 5, 20, 373, 312, 154, 76]

In [118]:
def recommend_place(user, category, city, main_table, rating_age, user_history):

    # Finding the user's age range
    age_range = main_table["Age_Range"][main_table["User_Id"] == user].iloc[0]

    # Selecting options from the same age range
    options = rating_age[rating_age["Age_Range"] == age_range]

    # Selecting best places from the category recommended
    options = options[options["Category"] == category]
    options = list(options["Place_Id"])

    # Recommending best ranked place from that category and age range
    place_id = options[0]

    # Checking if the user already visited the place, if so, recommends the next place
    while place_id in user_history:
        options.pop(0)
        place_id = options[0]

    # Find place name
    place_name = main_table["Place_Name"][main_table["Place_Id"] == place_id].iloc[0]

    # Returning id and name for recommended place
    return(place_id, place_name)

In [120]:
# Output for user 1
place = recommend_place(1, category, city, main_table, rating_age, user_history)
place

(71, 'Cibubur Garden Diary (Cibugary)')

In [141]:
if __name__ == "__main__":

    # Defining target user, city, and category of the page originating the recommendation
    # Example: User 1 is visiting a page of a museum (culture) in Jakarta
    user = 1
    category = "Culture"
    city = "Jakarta"

    # Creating main table
    main_table = create_main_table()

    # Creating transition matrix for target user
    transition_matrix = create_transition_matrix(user, main_table)

    # Recommending a category for target user
    category = recommend_category(user, category, transition_matrix)

    # Creating table with places ranking by age range
    rating_age = create_rating_age_table(city, main_table)

    # Creating list with places visited by target user
    user_history = create_user_history(user, main_table)

    # Recommending best ranked places from category, given that user hasn't visited it yet
    place_id, place_name  = recommend_place(user, category, city, main_table, rating_age, user_history)

    # Printing recomendation category and place name
    print(f"""{category}:  {place_name}""")

Culture:  Kauman Pakualaman Yogyakarta


##To Do

In [135]:
%%capture
"""

Source code improvements by priority:

1. Comment code;
2. Manage errors: implement try/except (e.g., new user, no user history);
3. Transition matrix weighted by user's ratings;
4. Recommendation place based on average price spent by user;


Analysis and results to-do list:

- Perform exploratory data analysis;
- Create graphs and charts;


Important:

- I forgot to filter the places by city, need to do that asap


Nice to have (lowest priority):

- Storytelling for application/use;
- Mock page visualization;


Future developments:

- Add description similarity analysis to recommend place;
"""