In [300]:
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances, euclidean_distances, rbf_kernel, laplacian_kernel
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [255]:
# bad distances: linear_kernel, polynomial_kernel, sigmoid_kernel
# good distances: cosine_similarity/cosine_distance, euclidean_distances, rbf_kernel, laplacian_kernel

In [301]:
# data reading
DATA_PATH = "numbeo.csv"
df = pd.read_csv(DATA_PATH)

# data preprocessing
df['id'] = df['CountryName'] + "#" + df['CityName']
m = preprocessing.StandardScaler().fit_transform(df.drop(['CountryName', 'CityName', 'id'],axis=1).values)

m.shape

(764, 30)

In [302]:
cos_dis = cosine_distances(m, m)
cos_dis = 1.0 - (cos_dis / np.max(cos_dis))

rbf_ker = rbf_kernel(m, m)
rbf_ker = rbf_ker / np.max(rbf_ker)

lap_ker = laplacian_kernel(m, m)
lap_ker = lap_ker / np.max(lap_ker)

euc_dis = euclidean_distances(m, m)
euc_dis = 1.0 - (euc_dis / np.max(euc_dis))

In [303]:
mean_matr = np.zeros(cos_dis.shape)
for i in range(mean_matr.shape[0]):
    for j in range(mean_matr.shape[1]):

        mean_matr[i,j] = (cos_dis[i, j] + rbf_ker[i, j] + 
                          lap_ker[i, j] + euc_dis[i, j]) / 4.0

similarity_matrix = mean_matr

In [305]:
mapping = pd.Series(df.index, index=df['id'])
print(mapping)

id
Albania#Tirana                0
Algeria#Algiers               1
Angola#Luanda                 2
Argentina#Buenos Aires        3
Argentina#Cordoba             4
                           ... 
Venezuela#Caracas           759
Vietnam#Da Nang             760
Vietnam#Hanoi               761
Vietnam#Ho Chi Minh City    762
Zimbabwe#Harare             763
Length: 764, dtype: int64


In [306]:
def recsys(id):
    index = mapping[id]

    similarity_score = list(enumerate(similarity_matrix[index]))
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[1:25]

    city_indeces = [i[0] for i in similarity_score]

    return (df['id'].iloc[city_indeces], similarity_score[0:25])

In [307]:
recsys('Russia#Moscow')

(509    Russia#Saint Petersburg
 511               Russia#Sochi
 500           Russia#Krasnodar
 498               Russia#Kazan
 510              Russia#Samara
 513         Russia#Vladivostok
 111             China#Hangzhou
 319                 Italy#Rome
 469              Poland#Warsaw
 506                Russia#Perm
 504         Russia#Novosibirsk
 120               China#Xiamen
 121               China#Zhuhai
 499          Russia#Khabarovsk
 515       Russia#Yekaterinburg
 448         Panama#Panama City
 118              China#Tianjin
 507      Russia#Rostov-na-donu
 110            China#Guangzhou
 594           Thailand#Bangkok
 514            Russia#Voronezh
 380         Mexico#Mexico City
 482                 Qatar#Doha
 114             China#Shanghai
 Name: id, dtype: object,
 [(509, 0.8678341134574866),
  (511, 0.7992148345002578),
  (500, 0.7524996042717704),
  (498, 0.7426671531056478),
  (510, 0.7371541580031618),
  (513, 0.7349797890743488),
  (111, 0.721555804011487),
  (3