In [27]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
events_data = pd.read_csv('/content/updated_eventbrite_prices.csv')

In [29]:
events_data.head()

Unnamed: 0.1,Unnamed: 0,event_name,event_date,event_location,event_price,event_details,geolocation,category
0,0,DwellFi PropTech and Web3 Event,"Thu, February 23, 2025, 5:30 PM – 8:30 PM PST","Celo 500 Treat Avenue San Francisco, CA 94110",276.66,T﻿his event is focused on PropTech and Web3.Pa...,POINT (37.735286 -122.408126),Entertainment - Theater
1,1,ABBA DANCING QUEEN DISCO NIGHT,"Sat, Feb 25, 2025, 10:00 PM – Sun, Feb 26, 202...","The Record Bar 163 Eddy Street San Francisco, ...",359.02,ABBA DANCING QUEEN DISCO NIGHTA Disco dance pa...,POINT (37.784144 -122.410313),Entertainment - Music
2,2,The Unreal Garden - San Francisco,"Multiple Dates Sun, Feb 26, 2025, 6:00 PM PDT",Palace of Fine Arts 3601 Lyon Street San Franc...,303.37,"This fall only, experience the hit immersive e...",POINT (37.80408 -122.448179),Entertainment - Theater
3,3,No Scrubs: SF's Best '90s Hip Hop and R&B Nigh...,Multiple Dates PDT,Neck of the Woods 406 Clement Street San Franc...,274.72,One of SF's most popular weekly dance parties ...,POINT (37.7831255 -122.4637116),Entertainment - Dance
4,4,45th Stanford Viennese Ball,"Sat, Feb 25, 2025, 8:30 PM – Sun, Feb 26, 2025...",Hilton San Francisco Union Square 333 O'Farrel...,214.71,Stanford Viennese Ball is an annual event mode...,POINT (37.7857343 -122.4104369),Entertainment - Music


In [30]:
events_data.shape


(768, 8)

In [31]:
# selecting the relevant features for recommendation

selected_features = ['event_name','event_location','category','geolocation','event_details','event_price']
print(selected_features)

['event_name', 'event_location', 'category', 'geolocation', 'event_details', 'event_price']


In [32]:
for feature in selected_features:
  events_data[feature] = events_data[feature].fillna('')

In [33]:
# combining all the 5 selected features

combined_features = events_data['event_name']+' '+events_data['event_location']+' '+events_data['category']+' '+events_data['geolocation']+' '+events_data['event_details']+' '+events_data['event_price'].astype(str)

In [34]:
print(combined_features)

0      DwellFi PropTech and Web3 Event Celo 500 Treat...
1      ABBA DANCING QUEEN DISCO NIGHT The Record Bar ...
2      The Unreal Garden - San Francisco Palace of Fi...
3      No Scrubs: SF's Best '90s Hip Hop and R&B Nigh...
4      45th Stanford Viennese Ball Hilton San Francis...
                             ...                        
763    Saturday Night Speed Dating Oakland | Ages 24-...
764    Throwback Thursday at Hometown Bowl Hometown B...
765    How to Start a Personal Finance Business - Oak...
766    San Francisco Barbary Coast Trail History Hike...
767    Boba Happy Hour - Japantown, San Francisco Jap...
Length: 768, dtype: object


In [35]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [36]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [37]:
print(feature_vectors)

  (0, 6516)	0.11989309644158717
  (0, 13058)	0.19982182740264529
  (0, 2879)	0.171183053387474
  (0, 17165)	0.13105062535762543
  (0, 7067)	0.04689271922931777
  (0, 4520)	0.19982182740264529
  (0, 1423)	0.02877798843979767
  (0, 16400)	0.03364881628182195
  (0, 3346)	0.0177101446833872
  (0, 14203)	0.01579849828750963
  (0, 7728)	0.016241609822899343
  (0, 4263)	0.005793689521833039
  (0, 2205)	0.02181584148733358
  (0, 6911)	0.010491351842861561
  (0, 15999)	0.01541932389914855
  (0, 12658)	0.005801228749954107
  (0, 943)	0.005839073920332797
  (0, 1615)	0.039964365480529056
  (0, 118)	0.005816336933731245
  (0, 1083)	0.039964365480529056
  (0, 8704)	0.018830560586705205
  (0, 9466)	0.031947397907352675
  (0, 7608)	0.030712253135631827
  (0, 11849)	0.01755633791698003
  (0, 12149)	0.027333267083114853
  :	:
  (767, 14631)	0.07300280044562839
  (767, 9284)	0.17249622889815888
  (767, 16607)	0.07859521829754917
  (767, 4175)	0.0717332929139219
  (767, 15786)	0.24514292056706308
  (767,

In [38]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [39]:
print(similarity)

[[1.         0.04036367 0.11304002 ... 0.08855963 0.08606636 0.04774834]
 [0.04036367 1.         0.06518077 ... 0.03141116 0.0587507  0.04225926]
 [0.11304002 0.06518077 1.         ... 0.14146878 0.14748179 0.06881051]
 ...
 [0.08855963 0.03141116 0.14146878 ... 1.         0.08743642 0.05424636]
 [0.08606636 0.0587507  0.14748179 ... 0.08743642 1.         0.08052288]
 [0.04774834 0.04225926 0.06881051 ... 0.05424636 0.08052288 1.        ]]


In [40]:
print(similarity.shape)

(768, 768)


In [44]:
list_of_all_titles = events_data['category'].tolist()
print(list_of_all_titles)

['Entertainment - Theater', 'Entertainment - Music', 'Entertainment - Theater', 'Entertainment - Dance', 'Entertainment - Music', 'Technology & Business - Tech', 'Entertainment - Dance', 'Technology & Business - Tech', 'Entertainment - Music', 'Entertainment - Music', 'Other', 'Technology & Business - Tech', 'Entertainment - Music', 'Entertainment - Music', 'Art & Culture - Art Exhibit', 'Entertainment - Theater', 'Entertainment - Music', 'Entertainment - Theater', 'Technology & Business - Tech', 'Entertainment - Music', 'Entertainment - Dance', 'Entertainment - Dance', 'Technology & Business - Tech', 'Technology & Business - Tech', 'Art & Culture - Art Exhibit', 'Entertainment - Theater', 'Entertainment - Music', 'Entertainment - Theater', 'Entertainment - Theater', 'Entertainment - Theater', 'Technology & Business - Tech', 'Technology & Business - Tech', 'Entertainment - Music', 'Technology & Business - Tech', 'Entertainment - Dance', 'Technology & Business - Tech', 'Entertainment - 

In [43]:
category_input = input('Enter your favourite category (or part of it): ')

# Find categories containing the user's input (case-insensitive) and list each category only once
matching_categories = []
for cat in list_of_all_titles:
    if category_input.lower() in cat.lower() and cat not in matching_categories:
        matching_categories.append(cat)

if matching_categories:
    print("Matching categories:", matching_categories)  # Display found categories
    category = matching_categories[0]  # Select the first matching category
    print(f"Using category: {category}")
else:
    print(f"No categories found containing '{category_input}'.")

Enter your favourite category (or part of it): music
Matching categories: ['Entertainment - Music']
Using category: Entertainment - Music


In [45]:
# Get the index of the chosen category
category_index = events_data[events_data['category'] == category].index[0]

# Get the similarity scores for all events compared to the chosen category
similarity_scores = similarity[category_index]

# Create a DataFrame to store event details and similarity scores
events_with_scores = events_data[['event_name', 'category']].copy()  # Select desired columns
events_with_scores['similarity_score'] = similarity_scores

In [46]:
# Sort events by similarity score in descending order
sorted_events = events_with_scores.sort_values(by=['similarity_score'], ascending=False)

# Display the top N events with their similarity scores
N = 10  # Number of events to display
print(sorted_events.head(N))

                                            event_name  \
1                       ABBA DANCING QUEEN DISCO NIGHT   
660   SpaceGodz: Dyloot, Gizma, Michael Liu, Seoulwave   
437  Illusions The Drag Queen Show San Francisco - ...   
367  Illusions The Drag Brunch San Francisco-Drag Q...   
528  Illusions The Drag Queen Show Oakland - Drag Q...   
530  Illusions The Drag Brunch Oakland-Drag Queen B...   
25          Fog City Pack Presents: ALPHA w/ Sister Zo   
96                           RENAISSANCE / RENAIDDANCE   
46            SF Pitch Night 2025: Compete and Connect   
168                The Elephant 6 Recording Co. (2022)   

                         category  similarity_score  
1           Entertainment - Music          1.000000  
660                         Other          0.169036  
437  Technology & Business - Tech          0.161154  
367  Technology & Business - Tech          0.156333  
528  Technology & Business - Tech          0.140210  
530  Technology & Business - Tech    

In [50]:
import difflib
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def get_matching_categories(category_input, all_categories):
    matching_categories = []
    for cat in all_categories:
        if category_input.lower() in cat.lower() and cat not in matching_categories:
            matching_categories.append(cat)
    return matching_categories

def get_events_by_category(category, events_data):
    return events_data[events_data['category'] == category].index.tolist()

def display_events(event_indices, events_data, start_index=0, num_events=5):
    end_index = min(start_index + num_events, len(event_indices))
    for index in event_indices[start_index:end_index]:
        event_details = events_data.loc[index, ['event_name', 'event_location', 'event_details', 'event_price','event_date']]
        print(f"Event Name: {event_details['event_name']}")
        print(f"Location: {event_details['event_location']}")
        print(f"Details: {event_details['event_details']}")
        print(f"Price: {event_details['event_price']}")
        print(f"Date: {event_details['event_date']}")
        print("-" * 20)

def main_interaction_loop(events_data):
    all_categories = events_data['category'].unique().tolist()
    budget_ranges = {
        "1": (0, 50),
        "2": (50, 100),
        "3": (100, 200),
        "4": (200, float('inf'))
    }

    while True:
        category_prompt = f"Enter your favourite category (or part of it), or type 'q' to quit.\n"
        category_prompt += f"Available categories: {', '.join(all_categories)}\n"
        category_prompt += "Your input: "
        category_input = input(category_prompt)

        if category_input.lower() == 'q':
            break

        matching_categories = get_matching_categories(category_input, all_categories)

        if matching_categories:
            category = matching_categories[0]
            print(f"Using category: {category}")

            while True:
                budget_choice = input(f"Enter your budget range:\n"
                                      f"1: $0 - $50\n"
                                      f"2: $50 - $100\n"
                                      f"3: $100 - $200\n"
                                      f"4: Above $200\n"
                                      f"Your choice: ")

                if budget_choice in budget_ranges:
                    break
                else:
                    print("Invalid budget choice. Please select a valid option.")

            min_price, max_price = budget_ranges[budget_choice]
            filtered_events = events_data[
                (events_data['event_price'] >= min_price) & (events_data['event_price'] <= max_price) & (events_data['category'] == category)]
            filtered_event_indices = filtered_events.index.tolist()

            if not filtered_event_indices:
                print(f"No events found in the '{category}' category within the selected budget range.")
                # Display all events in the category up to the selected budget
                filtered_events = events_data[
                    (events_data['event_price'] <= max_price) & (events_data['category'] == category)]
                filtered_event_indices = filtered_events.index.tolist()
                if not filtered_event_indices:
                  print(f"No events found in the '{category}' category within the selected budget range.")
                  break
                print(f"Displaying all events in the '{category}' category up to the selected budget:")

            displayed_events_indices = []
            num_events_displayed = 0
            display_events(filtered_event_indices, events_data, num_events_displayed, 5)
            num_events_displayed += 5
            displayed_events_indices.extend(filtered_event_indices[:5])
            while num_events_displayed < len(filtered_event_indices):  # Removed user_input != 'q' as it's handled inside the loop
                user_input = input("See more events (m), previous events (p), or quit (q): ").lower()

                if user_input == 'm':
                    display_events(filtered_event_indices, events_data, num_events_displayed, 5)
                    num_events_displayed += 5
                    displayed_events_indices.extend(filtered_event_indices[num_events_displayed - 5:num_events_displayed])
                elif user_input == 'p':
                    start_index = max(0, len(displayed_events_indices) - 10)  # Show previous 5, up to 10 total
                    display_events(displayed_events_indices, events_data, start_index, 5)
                elif user_input == 'q':
                    break  # User wants to quit
                else:
                    print("Invalid input. Please enter 'm', 'p', or 'q'.")  # Handle invalid input

main_interaction_loop(events_data)

Enter your favourite category (or part of it), or type 'q' to quit.
Available categories: Entertainment - Theater, Entertainment - Music, Entertainment - Dance, Technology & Business - Tech, Other, Art & Culture - Art Exhibit, Social & Lifestyle - Networking, Formal Events - Gala/Ball, Education & Professional - Conference, Education & Professional - Workshop, Sports & Fitness - Outdoor Activities
Your input: theater
Using category: Entertainment - Theater
Enter your budget range:
1: $0 - $50
2: $50 - $100
3: $100 - $200
4: Above $200
Your choice: 3
Event Name: The Hand That Rocks the Crawford
Location: OASIS 298 11th Street San Francisco, CA 94103
Details: Basic Instinct, Fatal Attraction, Single White Female— what do these films have in common? None of them had Joan Crawford in them… UNTIL NOW! From the award-winning team that brought you Trog! Live comes a world premiere comedy that puts everybody’s favorite Mommie Dearest into all the female-driven thrillers she was born to play. M