In [11]:
import pandas as pd
import numpy as np

clean_df = pd.read_csv('../Reco/Data/workspaces_clean.csv', index_col=0)

In [12]:
# Get 100 row indices labels following pattern:
# User_1, User_2, User_3 ... User_100
user_row_indices = []
for i in range(1, 101):
    user_row_indices.append(f"User_{i}")

# Dictionary to store weighted location ratings to add to dataframe
data = {
    "Workspace_Id": clean_df["Workspace_Id"],
    "Workspace": clean_df["Name"], # workspace locations
    "Category": clean_df["Category"]
}

# number of ratings to generate for each user
num_rows = len(clean_df)

# initialise random_seed to fixed value to always produce same results
random_seed = 2023
for row in user_row_indices:
     
    # set random seed
    np.random.seed(random_seed)
    
    # for each user, generate weights for each workspace location and add to data dictionary
    data[row] = np.random.uniform(1, 5, num_rows).round(1)
    
    # increment random seed at each iteration so that each category has different randomly generated values
    random_seed += 1

# create dataframe with weighted average for each location based on each user
weighted_clean_df = pd.DataFrame(data = data)
print("\nWeighted Average User Rating for each Workspace")
weighted_clean_df


Weighted Average User Rating for each Workspace


Unnamed: 0,Workspace_Id,Workspace,Category,User_1,User_2,User_3,User_4,User_5,User_6,User_7,...,User_91,User_92,User_93,User_94,User_95,User_96,User_97,User_98,User_99,User_100
0,1,Eugenio Trias Municipal Public Library,Public library,2.3,3.4,1.5,1.9,4.2,1.3,1.6,...,2.8,4.4,2.9,2.3,1.9,3.8,4.1,3.6,2.0,4.9
1,2,Iván de Vargas Library,Public library,4.6,3.8,4.6,2.7,4.6,1.7,2.1,...,1.3,2.9,4.2,2.4,1.5,4.0,4.6,1.8,4.9,3.9
2,3,Biblioteca Mario Vargas Llosa,Public library,3.4,1.8,4.7,4.9,4.1,3.7,4.5,...,1.4,2.9,1.5,3.5,3.5,4.3,1.9,2.2,4.3,3.2
3,4,Pedro Salinas Library,Public library,1.5,1.2,2.8,1.4,4.5,2.1,3.5,...,4.4,2.5,1.9,3.9,2.4,4.9,4.4,4.8,4.6,1.7
4,5,Acuna Public Library,Public library,1.6,1.8,2.6,2.9,3.1,3.4,2.8,...,1.8,3.1,2.2,3.3,1.6,2.7,4.8,2.3,3.5,3.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,261,Harina,Coffee shop,4.1,3.7,3.1,4.3,1.6,4.6,2.9,...,4.2,4.4,2.2,4.0,4.5,1.2,2.2,3.7,4.3,3.6
114,262,The Coffee Corner,Coffee shop,1.4,2.0,2.3,3.4,1.7,2.6,2.2,...,4.9,2.5,4.1,4.0,4.9,2.5,1.9,3.5,1.0,4.8
115,263,The Bear and the Madroño,Espresso bar,4.3,4.7,4.7,5.0,2.9,4.5,1.6,...,4.0,2.6,3.2,4.0,1.4,1.2,4.0,1.5,1.3,1.5
116,264,Cafés Pozo,Coffee store,1.4,2.0,4.0,1.6,2.2,2.1,4.3,...,2.0,1.3,3.0,1.6,1.1,2.3,4.2,1.9,3.1,4.4


In [13]:
category_averages_df = weighted_clean_df.groupby("Category").mean().round(1).T
print("\nUser Average Rating for Workspace Categories")
category_averages_df


User Average Rating for Workspace Categories


  category_averages_df = weighted_clean_df.groupby("Category").mean().round(1).T


Category,Bakery,Brunch,Business center,Cafe,Cafeteria,Coffee roasters,Coffee shop,Coffee store,Coworking space,Dog cafe,Donuts,Espresso bar,Library,Public library,Records storage facility,Restaurant,Tea store,University library
Workspace_Id,245.0,236.5,171.0,198.1,207.5,173.0,196.6,248.6,139.0,173.0,238.0,244.2,64.2,36.9,81.0,256.0,251.0,74.6
User_1,3.6,2.3,2.7,3.4,3.1,3.2,3.1,2.6,2.9,2.0,3.7,2.7,2.9,2.9,4.3,3.0,2.0,3.2
User_2,3.4,3.3,2.4,3.5,3.4,4.5,3.0,3.3,3.1,4.1,3.0,3.5,3.0,3.0,3.0,2.9,1.1,3.3
User_3,2.2,3.1,2.5,3.1,3.4,2.2,3.2,3.3,2.8,2.8,2.5,4.0,3.0,3.1,4.4,2.3,1.9,2.3
User_4,3.4,2.2,2.4,2.6,3.0,3.0,3.1,3.3,3.2,4.9,2.9,2.6,3.0,2.8,2.1,4.4,2.2,4.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_96,1.5,4.3,3.9,3.2,3.4,3.1,2.7,2.8,3.1,2.3,1.9,2.3,3.3,3.1,1.3,3.8,4.4,2.9
User_97,3.3,1.3,3.4,2.7,2.8,1.2,3.0,2.7,2.9,1.9,1.2,3.2,3.0,2.9,1.9,1.0,2.1,3.0
User_98,1.1,2.9,1.2,3.0,3.2,2.6,3.1,2.8,3.0,3.0,4.3,3.2,2.6,3.0,3.2,1.1,2.6,3.1
User_99,4.7,1.8,4.5,3.3,3.3,3.7,3.0,2.5,2.7,4.2,2.7,3.1,3.0,3.0,4.9,2.1,1.9,2.9


In [14]:
# Drop Workspace_Id row
category_averages_df.drop("Workspace_Id", axis=0, inplace=True)
category_averages_df

Category,Bakery,Brunch,Business center,Cafe,Cafeteria,Coffee roasters,Coffee shop,Coffee store,Coworking space,Dog cafe,Donuts,Espresso bar,Library,Public library,Records storage facility,Restaurant,Tea store,University library
User_1,3.6,2.3,2.7,3.4,3.1,3.2,3.1,2.6,2.9,2.0,3.7,2.7,2.9,2.9,4.3,3.0,2.0,3.2
User_2,3.4,3.3,2.4,3.5,3.4,4.5,3.0,3.3,3.1,4.1,3.0,3.5,3.0,3.0,3.0,2.9,1.1,3.3
User_3,2.2,3.1,2.5,3.1,3.4,2.2,3.2,3.3,2.8,2.8,2.5,4.0,3.0,3.1,4.4,2.3,1.9,2.3
User_4,3.4,2.2,2.4,2.6,3.0,3.0,3.1,3.3,3.2,4.9,2.9,2.6,3.0,2.8,2.1,4.4,2.2,4.2
User_5,1.5,3.1,3.4,2.8,2.8,3.6,3.2,2.4,3.2,4.4,1.0,2.8,3.2,3.0,1.7,4.1,3.8,2.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_96,1.5,4.3,3.9,3.2,3.4,3.1,2.7,2.8,3.1,2.3,1.9,2.3,3.3,3.1,1.3,3.8,4.4,2.9
User_97,3.3,1.3,3.4,2.7,2.8,1.2,3.0,2.7,2.9,1.9,1.2,3.2,3.0,2.9,1.9,1.0,2.1,3.0
User_98,1.1,2.9,1.2,3.0,3.2,2.6,3.1,2.8,3.0,3.0,4.3,3.2,2.6,3.0,3.2,1.1,2.6,3.1
User_99,4.7,1.8,4.5,3.3,3.3,3.7,3.0,2.5,2.7,4.2,2.7,3.1,3.0,3.0,4.9,2.1,1.9,2.9


In [15]:
# Get list if Workspace Ids
workspace_ids = [i for i in weighted_clean_df["Workspace_Id"]]

# Create dataframe for workspace to workspace affinity matrix
# with indices and columns equal to the workspace Ids
workspace_workspace_df = pd.DataFrame(index=workspace_ids, columns = workspace_ids)

# Fill workspace to workspace affinity matrix
# Loop through each row index for outer loop (current workspace at hand at row index i)
for i in workspace_workspace_df.index:
    # Get user rating for current workspace i
    current_workspace_rating = np.array(weighted_clean_df[weighted_clean_df["Workspace_Id"]==int(i)]["User_1"])[0]
    
    # Loop again through each row index for inner loop (workspace to compare at column index j)
    for j in workspace_workspace_df.index:
        # Get user rating for current workspace j
        workspace_to_compare_rating = np.array(weighted_clean_df[weighted_clean_df["Workspace_Id"]==int(j)]["User_1"])[0]
        
        # get average rating between current workspace at row index i and workspace to compare at column j
        average_rating = np.mean([current_workspace_rating, workspace_to_compare_rating]).round(1)
        
        # fill workspace[i, j] with average rating
        workspace_workspace_df.loc[i, j] = average_rating

In [16]:
# Recommendation scores are obtained by multiplying the workspace-to-workspace affinity matrix
# by the User_1 affinity vector
rec_scores = workspace_workspace_df.values.dot(weighted_clean_df["User_1"].values)

# Get data_frame for User_1 Workspace recommendation scores (descending order)
# Index equates to the workspace Id for each workspace
data = {"User_1_Recommendations": rec_scores}
user_1_rec = pd.DataFrame(data=data, index=workspace_workspace_df.index)
user_1_rec.sort_values("User_1_Recommendations", ascending=False, inplace=True)
user_1_rec

Unnamed: 0,User_1_Recommendations
133,3304.14
69,3304.14
105,3269.81
36,3269.81
46,3269.81
...,...
7,1765.4
220,1765.4
83,1765.4
20,1765.4


Print Recommendations

In [17]:
# Getting sub-dataframe with top 5 scored workspaces
top_5_workspaces = user_1_rec.head(5)
print("\nTop 5 Workspaces for User 1")
top_5_workspaces


Top 5 Workspaces for User 1


Unnamed: 0,User_1_Recommendations
133,3304.14
69,3304.14
105,3269.81
36,3269.81
46,3269.81


In [18]:
def print_workspace(workspace_id):
    # Get workspace row based on id
    workspace = clean_df[clean_df["Workspace_Id"] == workspace_id]
    
    # Get price range string based on category codes
    price_range_cat = workspace["Price_range"].values[0]
    if(price_range_cat == 0):
        price_range = None
    elif(price_range_cat == 1):
        price_range = "€"
    elif(price_range_cat == 2):
        price_range = "€€"
    elif(price_range_cat == 3):
        price_range = "€€€"
    
    # Print workspace details
    print(workspace["Name"].values[0])
    print(workspace["Address"].values[0])
    print(workspace["Category"].values[0])
    if price_range is not None:
      print(f"Price range: {price_range}")
    print(workspace["Next_status"].values[0])
    print(f"Overall Rating: {workspace['Rating'].values[0]}")  

In [19]:
# Print top 5 recommended workspaces for User_1

print("\nWorkspace Recommendations for User 1\n")

# Initialise to make top 5 count
top = 0

# Get index/workspace Id of each top 5 workspace
for i in top_5_workspaces.index:
    top += 1
    # Print details for each top choice
    print(f"Top {top} Choice\n")
    print_workspace(i)
    print("\n\n")


Workspace Recommendations for User 1

Top 1 Choice

Sharing CO
C. de Caracas, 6
Coworking space
Closes 6 PM
Overall Rating: 4.7



Top 2 Choice

Biblioteca Tomás Navarro Tomás
Centro de Ciencias Humanas y Sociales del CSIC, C. de Albasanz, 26-28
Library
Opens 9 AM Mon
Overall Rating: 3.5



Top 3 Choice

FELIZ COFFEE Specialty Coffee
C. de Lope de Vega, 2
Cafe
Price range: €
Closes 7:30 PM
Overall Rating: 4.7



Top 4 Choice

Study Room Luis García Berlanga
C. Viña Virgen, 2
Public library
Closes 9 PM
Overall Rating: 3.0



Top 5 Choice

Gerardo Diego Municipal Public Library
C. del Monte Aya, 12
Public library
Opens 8:30 AM Mon
Overall Rating: 4.0



