In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv("./book50.csv")

In [3]:
data.head()

Unnamed: 0,index,city,country,continent,latitude,longitude,overall_rate,genres,keywords,event_cost,event_feedback,city_condition,weather_condition,facilities_feedback
0,0,London,United Kingdom,Europe,51.5074,-0.1278,4.7,Finance,"Fintech, banking, innovation",6500,The event provided valuable insights into the ...,London was vibrant and offered a mix of histor...,The weather was mild with occasional rain.,"The facilities provided were excellent, with s..."
1,1,New York City,United States,North America,40.7128,-74.006,4.6,Finance,"Fintech, digital innovation, startups",6500,The event provided valuable networking opportu...,New York City was lively with a diverse range ...,"The weather was sunny and warm, perfect for ou...","The facilities provided were excellent, with s..."
2,2,Shanghai,China,Asia,31.2304,121.4737,4.1,E-commerce,"Online marketplace, digital payments, globaliz...",6000,The event showcased the latest trends in e-com...,Shanghai was a vibrant metropolis with a blend...,The weather was humid with occasional rainfall.,"The facilities provided were satisfactory, but..."
3,3,Paris,France,Europe,48.8566,2.3522,4.7,Fashion,"Haute couture, luxury brands, fashion trends",7000,The event celebrated the world of fashion and ...,"Paris was romantic and offered a blend of art,...",The weather was pleasant with mild temperatures.,"The facilities provided were excellent, with l..."
4,4,Tokyo,Japan,Asia,35.6895,139.6917,4.5,Technology,"Robotics, AI, future technology",6000,The event showcased cutting-edge technology an...,Tokyo was a bustling metropolis with a mix of ...,The weather was pleasant and mild.,"The facilities provided were excellent, with a..."


In [4]:
data.shape

(31, 14)

In [5]:
# combining all the selected features

combined_features = data['genres']+' '+data['keywords']+' '+data['event_feedback']+' '+data['city_condition']+' '+data['weather_condition']+' '+data['facilities_feedback']

In [6]:
print(combined_features)

0     Finance Fintech, banking, innovation The event...
1     Finance Fintech, digital innovation, startups ...
2     E-commerce Online marketplace, digital payment...
3     Fashion Haute couture, luxury brands, fashion ...
4     Technology Robotics, AI, future technology The...
5     Technology Innovation, entrepreneurship, tech ...
6     Business Business networking, luxury hospitali...
7     Business Business innovation, tech startups, d...
8     Digital Marketing Digital advertising, marketi...
9     Technology Tech startups, artificial intellige...
10    Technology K-pop, gaming industry, technology ...
11    Technology Tech innovation, entrepreneurship, ...
12    Sustainable Development Green technologies, su...
13    Finance Financial hub, fintech, innovation The...
14    Film Bollywood, film production, entertainment...
15    Finance Cryptocurrency, banking, financial tec...
16    Sports Olympic Games, sports events, tourism T...
17    Technology AI development, tech innovation

In [7]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [8]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [9]:
print(feature_vectors)

  (0, 247)	0.2027048497128504
  (0, 53)	0.1302242537124601
  (0, 13)	0.1516965099147576
  (0, 254)	0.2027048497128504
  (0, 99)	0.10581477154853694
  (0, 292)	0.06020119037569938
  (0, 104)	0.06020119037569938
  (0, 216)	0.1302242537124601
  (0, 194)	0.1244864674333178
  (0, 293)	0.1281729723465368
  (0, 180)	0.10581477154853694
  (0, 291)	0.06020119037569938
  (0, 21)	0.185386046501505
  (0, 184)	0.10192947575096793
  (0, 128)	0.1436577611262365
  (0, 195)	0.1655163641160985
  (0, 182)	0.1436577611262365
  (0, 196)	0.2027048497128504
  (0, 288)	0.08555874237556593
  (0, 290)	0.12040238075139877
  (0, 165)	0.22711433187677355
  (0, 269)	0.1965596027030839
  (0, 8)	0.24080476150279753
  (0, 139)	0.1436577611262365
  (0, 280)	0.16097656433758187
  :	:
  (30, 27)	0.07018888655286577
  (30, 61)	0.07018888655286577
  (30, 138)	0.07018888655286577
  (30, 245)	0.07018888655286577
  (30, 40)	0.07018888655286577
  (30, 229)	0.07018888655286577
  (30, 237)	0.0754366003302059
  (30, 114)	0.127259

In [10]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [11]:
print(similarity)

[[1.         0.49824985 0.32628105 0.24979037 0.37979309 0.26019175
  0.15190305 0.16810179 0.27211388 0.19427455 0.33228683 0.24728507
  0.12091986 0.37804076 0.18408764 0.49107489 0.15124519 0.3537179
  0.24116081 0.30246339 0.19296954 0.23307734 0.25008815 0.2322353
  0.1426366  0.28362782 0.2179239  0.13751079 0.3211498  0.20222736
  0.20695678]
 [0.49824985 1.         0.18552897 0.16157959 0.22658926 0.17014925
  0.20985923 0.1961956  0.18557405 0.21902305 0.21675911 0.20914277
  0.10004082 0.28104549 0.11711801 0.2377683  0.19785752 0.17250748
  0.23244901 0.23478448 0.30008941 0.21532039 0.25385536 0.19195999
  0.09919524 0.20027024 0.26281259 0.11402533 0.20795528 0.15046009
  0.19287066]
 [0.32628105 0.18552897 1.         0.17921935 0.22786224 0.21685136
  0.17760357 0.29108295 0.22179578 0.17851502 0.15532565 0.14907715
  0.14117756 0.18001849 0.18663538 0.18990719 0.17262335 0.30717977
  0.16557195 0.18138034 0.32528871 0.18050689 0.46502938 0.20695197
  0.14969594 0.1789977

In [12]:
print(similarity.shape)

(31, 31)


In [13]:
def Continent(cities):
    index = data[data['continent'] == cities].index[0]
    distance = sorted(list(enumerate(similarity[index])),reverse = True, key = lambda vector:vector[1])
    for i in distance[0:5]:
        print(data.iloc[i[0]].country)
        print(data.iloc[i[0]].city)
        print(data.iloc[i[0]].keywords)
        print(data.iloc[i[0]].event_cost)
        print("\n")

In [14]:
import joblib

In [15]:
joblib.dump(Continent,'Continent.pkl')

['Continent.pkl']

In [17]:
joblib.dump(similarity,open('./similarity.pkl','wb'))
joblib.dump(data,open('./data.pkl','wb'))