In [3]:
# Clear all variables from the current environment
locals().clear()
globals().clear()

In [4]:
import pickle
with open('../input/business.pkl', 'rb') as f:
    businesses = pickle.load(f)
with open('../input/review.pkl', 'rb') as f:
    all_reviews = pickle.load(f)

# Cut the data size for reviews to 1000
reviews = all_reviews.head(1000)

In [7]:
from textblob import TextBlob
import folium
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

# Filter businesses in Philadelphia
philadelphia_businesses = businesses[businesses['city'] == 'Philadelphia']

# Merge reviews and businesses dataframes
merged_data = pd.merge(reviews, philadelphia_businesses, on='business_id', how='inner')

# Perform sentiment analysis
tqdm.pandas(desc="Sentiment Analysis")
merged_data['sentiment'] = merged_data['text'].progress_apply(lambda text: TextBlob(text).sentiment.polarity)

# Prepare data for clustering
X = merged_data[['latitude', 'longitude', 'sentiment']]

# Perform KMeans clustering
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
merged_data['cluster'] = kmeans.fit_predict(X)

# Create a function to visualize clusters on a map
def plot_clusters_on_map(df, n_clusters):
    map = folium.Map(location=[39.952583, -75.165222], zoom_start=12)
    
    # Define cluster colors
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
    
    # Plot businesses on the map with cluster colors and a progress bar
    for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Plotting Clusters"):
        color = colors[row['cluster'] % len(colors)]
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            icon=folium.Icon(color=color),
            popup=f"<b>Name:</b> {row['name']}<br><b>Stars:</b> {row['stars_x']}<br><b>Reviews:</b> {row['review_count']}<br><b>Address:</b> {row['address']}"
        ).add_to(map)
    
    return map

# Visualize the clusters on a map
cluster_map = plot_clusters_on_map(merged_data, n_clusters)
cluster_map.save("../result/clusters_map_demo.html")

Sentiment Analysis:   0%|          | 0/186 [00:00<?, ?it/s]



Plotting Clusters:   0%|          | 0/186 [00:00<?, ?it/s]