In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import re
import random

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/novindrap07/recom_system/master/dataset/hotel_bandung_english.csv')

In [3]:
df.head()

Unnamed: 0,name,address,description
0,Capital O 253 Topas Galeria Hotel,"Jl. Dr. Djundjunan No. 153, 40173 Bandung, Ind...","A 10-minute drive from Bandung Airport, Topas ..."
1,Sheraton Bandung Hotel & Towers,"Jl. Ir H Juanda 390, 40135 Bandung, Indonesia",Sheraton Hotel & Towers offers 5-star accommod...
2,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO..."
3,OYO 226 LJ hotel,"Jl. Malabar No.2, Malabar, Lengkong, Dago, Asi...","Featuring a shared lounge, OYO 226 LJ hotel is..."
4,OYO 230 Maleo Residence,"JI. Dangeur Indah II No. 15, Sukagalih, Sukaja...",Attractively set in the Sukajadi district of B...


In [4]:
df.describe()

Unnamed: 0,name,address,description
count,105,105,105
unique,101,102,103
top,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO..."
freq,3,3,2


In [5]:
def print_description(index):
    example = df[df.index == index][['description', 'name', 'address']].values[0]
    if len(example) > 0:
        print(example[0])
        print('Nama:', example[1])
        print('Alamat:', example[2])  

In [6]:
print_description(4)

Attractively set in the Sukajadi district of Bandung, OYO 230 Maleo Residence is located 3.7 km from Villa Isola, 4.7 km from Cihampelas Walk and 5.8 km from Stasiun Hall Bus Terminal. The property is around 6 km from Gedung Sate, 8 km from Riau Street and 8 km from Braga City Walk. Free WiFi and a 24-hour front desk are offered. At the hotel, rooms include a desk. Complete with a private bathroom equipped with a bath and free toiletries, the rooms at OYO 230 Maleo Residence have a flat-screen TV and air conditioning, and certain rooms have a balcony. The nearest airport is Husein Sastranegara Airport, 2.7 km from the accommodation. Sukajadi is a great choice for travelers interested in culturally diverse food, scenery and food shopping.
Nama: OYO 230 Maleo Residence
Alamat: JI. Dangeur Indah II No. 15, Sukagalih, Sukajadi, Pasteur, Sukajadi, 40163 Bandung, Indonesia


In [7]:
clean_spcl = re.compile('[/(){}\[\]\|@,;]')
clean_symbol = re.compile('[^0-9a-z #+_]')
stopworda = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower() # lowercase text
    text = clean_spcl.sub(' ', text)
    text = clean_symbol.sub('', text)
    text = ' '.join(word for word in text.split() if word not in stopworda) # hapus stopword dari kolom deskripsi
    return text

# Buat kolom tambahan untuk data description yang telah dibersihkan   
df['desc_clean'] = df['description'].apply(clean_text)

In [8]:
df.head()

Unnamed: 0,name,address,description,desc_clean
0,Capital O 253 Topas Galeria Hotel,"Jl. Dr. Djundjunan No. 153, 40173 Bandung, Ind...","A 10-minute drive from Bandung Airport, Topas ...",10minute drive bandung airport topas galeria h...
1,Sheraton Bandung Hotel & Towers,"Jl. Ir H Juanda 390, 40135 Bandung, Indonesia",Sheraton Hotel & Towers offers 5-star accommod...,sheraton hotel towers offers 5star accommodati...
2,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO...",conveniently located sukajadi bandung oyo 794 ...
3,OYO 226 LJ hotel,"Jl. Malabar No.2, Malabar, Lengkong, Dago, Asi...","Featuring a shared lounge, OYO 226 LJ hotel is...",featuring shared lounge oyo 226 lj hotel locat...
4,OYO 230 Maleo Residence,"JI. Dangeur Indah II No. 15, Sukagalih, Sukaja...",Attractively set in the Sukajadi district of B...,attractively set sukajadi district bandung oyo...


In [9]:
df.set_index('name', inplace=True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc_clean'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_sim

array([[1.        , 0.02250818, 0.01254879, ..., 0.01044102, 0.04017144,
        0.03531754],
       [0.02250818, 1.        , 0.01040992, ..., 0.01269843, 0.02856891,
        0.01847406],
       [0.01254879, 0.01040992, 1.        , ..., 0.12575247, 0.01082423,
        0.02511644],
       ...,
       [0.01044102, 0.01269843, 0.12575247, ..., 1.        , 0.01065003,
        0.02392556],
       [0.04017144, 0.02856891, 0.01082423, ..., 0.01065003, 1.        ,
        0.03826221],
       [0.03531754, 0.01847406, 0.02511644, ..., 0.02392556, 0.03826221,
        1.        ]])

In [10]:
indices = pd.Series(df.index)
indices[:10]

0               Capital O 253 Topas Galeria Hotel
1                 Sheraton Bandung Hotel & Towers
2                  OYO 794 Ln 9 Bandung Residence
3                                OYO 226 LJ hotel
4                         OYO 230 Maleo Residence
5                       OYO 167 Dago's Hill Hotel
6                  OYO 794 Ln 9 Bandung Residence
7                      OYO 196 Horizone Residence
8    OYO 483 Flagship Tamansari Panoramic Bandung
9              OYO 295 Grha Ciumbuleuit Residence
Name: name, dtype: object

In [11]:
def recommendations(name, cos_sim = cos_sim):
    
    recommended_hotel = []
    
    # Mengambil nama hotel berdasarkan variabel indicies
    idx = indices[indices == name].index[0]

    # Membuat series berdasarkan skor kesamaan
    score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)

    # mengambil index dan dibuat 10 baris rekomendasi terbaik
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    for i in top_10_indexes:
        recommended_hotel.append(list(df.index)[i])
        
    return recommended_hotel

In [12]:
recommendations('Benua Hotel')

['FOX Lite Hotel Metro Indah Bandung',
 'InterContinental Bandung Dago Pakar',
 'Zest Sukajadi Hotel Bandung',
 'M Premiere Hotel Dago Bandung',
 'Ibis Bandung Pasteur',
 'Serela Cihampelas Hotel',
 'Grand Cordela Hotel Bandung ',
 'Favehotel Hyper Square',
 'HARRIS Hotel & Conventions Ciumbuleuit - Bandung',
 'Hemangini Hotel Bandung']

In [13]:
recommendations("Serela Cihampelas Hotel")

['Vio Cihampelas',
 'Grand Sovia Hotel',
 'Neo Dipatiukur Bandung',
 'Grand Tjokro Bandung',
 'HARRIS Hotel & Conventions Ciumbuleuit - Bandung',
 'InterContinental Bandung Dago Pakar',
 'Ibis Bandung Pasteur',
 'Tebu Hotel Bandung',
 'Aryaduta Bandung',
 'Benua Hotel']

In [14]:
recommendations('Hemangini Hotel Bandung')

['UTC Bandung',
 'Amaris Hotel Setiabudhi - Bandung',
 'M Premiere Hotel Dago Bandung',
 'Aston Pasteur',
 'YELLO Hotel Paskal Bandung',
 'Green Batara Hotel',
 'Grand Cordela Hotel Bandung ',
 'El Cavana Bandung',
 'Zest Sukajadi Hotel Bandung',
 'Meize Hotel Bandung']