In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
df = pd.read_csv("Thailand_Final_Realistic_Sports_Names_Corrected.csv")
df.head()

Unnamed: 0,Event Name,Event Type,Event Description,Date & Time,Duration (hrs),City,Country,Latitude,Longitude,Attendees,Average Rating,Review Count,Cost,Target Audience
0,Traditional Thai Sports Carnival in Hat Yai,Sports,Experience competitive spirit and high energy ...,1/15/2025 7:43,2,Hat Yai,Thailand,6.224497,103.106054,3068,3.4,157,Low,Nomads
1,Songkran Island Street Feast of Food in Phuket,Food,Experience the taste of Thailand at the Songkr...,1/1/2025 23:24,6,Phuket,Thailand,8.413457,99.565992,1831,3.6,370,High,Locals
2,Highland Marathon in Rayong,Sports,Experience competitive spirit and high energy ...,3/8/2025 23:38,2,Rayong,Thailand,14.480564,98.125798,2085,3.1,965,High,Nomads
3,Thai Festival of Food in Pattaya,Food,Experience the taste of Thailand at the Thai F...,1/3/2025 22:45,4,Pattaya,Thailand,12.989783,97.477392,1564,4.8,668,Free,Everyone
4,FutureCode Conference in Khon Kaen,Technology,Join leading tech minds and innovators at the ...,3/6/2025 13:57,4,Khon Kaen,Thailand,14.906898,101.237941,4057,3.1,79,Free,Locals


In [2]:
# Combine relevant fields for text-based TF-IDF
df['Combined_Text'] = df['Event Name'] + " " + df['Event Type'] + " " + df['Target Audience']

# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Combined_Text'])

print("TF-IDF matrix shape:", tfidf_matrix.shape)


TF-IDF matrix shape: (700, 195)


In [3]:
def get_tfidf_recommendations(user_interests, location, top_n=5):
    # Filter by location
    location_filtered = df[df['City'].str.lower() == location.lower()]
    if location_filtered.empty:
        return ["No events found for the specified location."]
    
    # Combine user interests into a string
    interest_query = " ".join(user_interests)
    
    # Transform user interests to TF-IDF vector
    query_vec = vectorizer.transform([interest_query])
    
    # Only compute similarity within the filtered location
    indices = location_filtered.index
    location_tfidf = tfidf_matrix[indices]
    
    # Compute cosine similarity
    similarities = cosine_similarity(query_vec, location_tfidf).flatten()
    top_indices = similarities.argsort()[-top_n:][::-1]
    
    # Display top matching events
    recommendations = location_filtered.iloc[top_indices][["Event Name", "Event Type", "City"]]
    return recommendations.reset_index(drop=True)


In [4]:
# Sample inputs
user_interests = ["Technology", "Startup", "Networking"]
location = "Bangkok"

# Get and view results
get_tfidf_recommendations(user_interests, location)


Unnamed: 0,Event Name,Event Type,City
0,TechSphere in Bangkok,Technology,Bangkok
1,Smart Nation Conference in Bangkok,Technology,Bangkok
2,NextGen Tech Expo in Bangkok,Technology,Bangkok
3,Digital Futures Forum in Bangkok,Technology,Bangkok
4,AI Summit by AIS in Bangkok,Technology,Bangkok
