In [16]:
import pandas as pd
import random
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

In [17]:
hotelFeatures_df = pd.read_csv('hotel_features.csv')
hotelFeatures_df = hotelFeatures_df.sort_values('property_feature', ascending=True)
hotelFeatures_df

Unnamed: 0,hotel_ID,property_feature
90,Seda Bonifacio Global City,A la carte breakfast
1390,mySTAY Hotel BGC West,A la carte breakfast
1199,Seda Bonifacio Global City,A la carte breakfast
5436,Exquisite 1BR in Uptown BGC,Accessible by elevator
1421,mySTAY Hotel BGC West,Accessible by elevator
...,...,...
210,F1 Hotel Manila,Xerox/fax in business center
1853,MySpace Hotel @BGC,Xerox/fax in business center
352,MySpace Hotel @BGC,Xerox/fax in business center
555,"Shangri-La The Fort, Manila",Yoga room


In [18]:
hotelFeatures_df['property_feature'].value_counts()

Air conditioning               183
Towels                         151
Elevator                       148
Linens                         139
Toiletries                     130
                              ... 
Humidifier                       1
Shared bathroom                  1
Sewing kit                       1
Service animals are welcome      1
iPod docking station             1
Name: property_feature, Length: 326, dtype: int64

In [19]:
hotelFeatures_df['hotel_ID'].value_counts()

Seda Bonifacio Global City                         214
MySpace Hotel @BGC                                 188
F1 Hotel Manila                                    178
Ascott Bonifacio Global City Manila                164
Exquisite 1BR in Uptown BGC                        122
                                                  ... 
Ridgewood Towers 1701 Condo                          5
Spacious Loft in BGC with Golf and Sunset View       5
Azure Luxury Beach Suites by Vacationsph             5
StayHere Burgos Circle 1 Bedroom Avant@Fort Gym      4
Blue Nest Apartment                                  2
Name: hotel_ID, Length: 193, dtype: int64

In [20]:
# Create a new DataFrame with hotel_ID as index and property features as columns
item_profile = pd.DataFrame(index=hotelFeatures_df['hotel_ID'].unique(), columns=hotelFeatures_df['property_feature'].unique())

# Fill in the values
for index, row in hotelFeatures_df.iterrows():
    item_profile.loc[row['hotel_ID'], row['property_feature']] = 1

# Fill NaN values with 0
item_profile.fillna(0, inplace=True)
item_profile

Unnamed: 0,A la carte breakfast,Accessible by elevator,Accessible by stairs,Accessible vanities,Adapter,Additional toilet,Adjustable height hand-held shower wand,Adult-only property,Air conditioning,Air conditioning in public area,...,Wi-Fi,Wi-Fi [free],Wi-Fi [portable rental],Wi-Fi in public areas,Window,Wine glasses,Wooden/parqueted flooring,Xerox/fax in business center,Yoga room,iPod docking station
Seda Bonifacio Global City,1,0,0,0,0,0,0,0,1,1,...,0,1,0,1,0,0,0,0,0,0
mySTAY Hotel BGC West,1,1,0,0,0,0,0,0,1,1,...,0,0,0,1,0,0,0,0,0,0
Exquisite 1BR in Uptown BGC,0,1,0,0,1,1,0,0,1,1,...,0,0,0,1,0,0,0,0,0,0
Forbes Area BGC Taguig Manila,0,1,0,0,0,0,0,0,1,1,...,0,0,0,1,0,0,0,0,0,0
Ascott Bonifacio Global City Manila,0,1,0,1,1,0,1,0,1,1,...,0,1,0,1,1,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Azure Residences Condominium Daily Rental,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Azure Luxury Beach Suites by Vacationsph,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SMDC Grace Taguig,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
StayHere Burgos Circle 1 Bedroom Avant@Fort Gym,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
#Sample user matrix
user_random_rating = pd.DataFrame(np.random.randint(0,2,size=(193, 326)))
user_random_rating

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,316,317,318,319,320,321,322,323,324,325
0,1,0,1,0,0,0,0,0,1,1,...,1,1,1,0,1,1,1,0,1,0
1,0,1,1,0,0,0,0,1,1,1,...,0,1,0,0,0,0,1,1,1,1
2,1,0,0,0,1,1,0,0,0,0,...,1,0,1,1,1,0,1,1,1,0
3,1,1,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,1,1,0,1
4,1,0,1,0,1,0,0,0,0,0,...,1,1,0,1,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,0,1,0,0,1,1,0,1,0,0,...,1,0,0,0,0,1,0,1,1,0
189,1,1,0,1,1,0,1,0,0,0,...,0,1,1,0,1,0,1,1,1,0
190,1,0,0,0,1,0,1,0,1,0,...,1,0,1,1,0,1,0,0,0,1
191,1,1,1,1,1,1,1,1,1,1,...,1,0,0,0,1,0,0,0,0,1


In [42]:
user_profile = user_random_rating.mean(axis=0)
user_profile

0      0.476684
1      0.502591
2      0.471503
3      0.492228
4      0.492228
         ...   
321    0.466321
322    0.497409
323    0.518135
324    0.528497
325    0.466321
Length: 326, dtype: float64

In [44]:
scores_df = item_profile.copy()
scores = cosine_similarity(item_profile, user_profile.values.reshape(1,-1)).reshape(-1)
scores_df['similarity'] = scores
df_scores_sorted = df_scores.sort_values('similarity', ascending=False)

Unnamed: 0,A la carte breakfast,Accessible by elevator,Accessible by stairs,Accessible vanities,Adapter,Additional toilet,Adjustable height hand-held shower wand,Adult-only property,Air conditioning,Air conditioning in public area,...,Wi-Fi [free],Wi-Fi [portable rental],Wi-Fi in public areas,Window,Wine glasses,Wooden/parqueted flooring,Xerox/fax in business center,Yoga room,iPod docking station,similarity
Seda Bonifacio Global City,1,0,0,0,0,0,0,0,1,1,...,1,0,1,0,0,0,0,0,0,0.573992
mySTAY Hotel BGC West,1,1,0,0,0,0,0,0,1,1,...,0,0,1,0,0,0,0,0,0,0.398202
Exquisite 1BR in Uptown BGC,0,1,0,0,1,1,0,0,1,1,...,0,0,1,0,0,0,0,0,0,0.606454
Forbes Area BGC Taguig Manila,0,1,0,0,0,0,0,0,1,1,...,0,0,1,0,0,0,0,0,0,0.288775
Ascott Bonifacio Global City Manila,0,1,0,1,1,0,1,0,1,1,...,1,0,1,1,0,1,0,0,1,0.707547


In [None]:
users_df = pd.read_csv('user_ratings.csv')
users_df

In [None]:
users_df['userID'].value_counts()

In [None]:
users_df.loc[users_df['userID']== "Maria"]
#Judging below Maria could be possibly different people