Data Preprocessing:
- Remove/ replace missing values

In [1]:
pip install requests pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import requests
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error
# from sklearn.metrics import r2_score

Scraping from Google Reviews:

In [38]:
import time

In [51]:
API_KEY = "AIzaSyC6M-BJbnUubBk1CvFVoxdC21_zxdsaCSE"
SEARCH_URL = "https://maps.googleapis.com/maps/api/place/textsearch/json"
DETAILS_URL = "https://maps.googleapis.com/maps/api/place/details/json"

locations = [
    (1.3521, 103.8198),  # Central
    (1.2800, 103.8500),  # Downtown
    (1.3000, 103.9000),  # East
    (1.3400, 103.7000),  # West
    (1.4000, 103.9000),  # North
    (1.3700, 103.7700),  # Northwest
    (1.3100, 103.8400),  # Central South
    (1.3300, 103.9300),  # East Coast
]

# === FETCH ALL OUTLETS ===
def fetch_nearby_anytime_fitness(lat, lng, radius=10000):
    """Fetch Anytime Fitness gyms near given lat/lng."""
    params = {
        "location": f"{lat},{lng}",
        "radius": radius,
        "keyword": "Anytime Fitness",
        "type": "establishment",
        "key": API_KEY
    }

    gyms = []
    while True:
        response = requests.get(SEARCH_URL, params=params)
        data = response.json()

        for r in data.get("results", []):
            name = r.get("name", "")
            if "anytime fitness" not in name.lower():
                continue  # skip non-AF gyms

        
            gyms.append({
                "name": r.get("name"),
                "place_id": r.get("place_id"),
                "address": r.get("vicinity"),
                "rating": r.get("rating"),
                "user_ratings_total": r.get("user_ratings_total")
            })

        token = data.get("next_page_token")
        if not token:
            break

        time.sleep(2)
        params = {"pagetoken": token, "key": API_KEY}

    return gyms


# === FETCH REVIEWS FOR EACH OUTLET ===
def fetch_reviews_for_outlet(place_id, name):
    """Fetch up to 5 reviews for a specific outlet."""
    params = {
        "place_id": place_id,
        "fields": "name,reviews",
        "key": API_KEY
    }
    response = requests.get(DETAILS_URL, params=params)
    data = response.json()
    reviews = data.get("result", {}).get("reviews", [])

    review_list = []
    for rev in reviews:
        review_list.append({
            "outlet_name": name,
            "author": rev.get("author_name"),
            "rating": rev.get("rating"),
            "text": rev.get("text"),
            "time": rev.get("relative_time_description")
        })
    return review_list


# === MAIN WORKFLOW ===
print("🔍 Fetching Anytime Fitness outlets across Singapore...")

all_gyms = []
for lat, lng in locations:
    print(f"📍 Searching around {lat}, {lng}...")
    all_gyms.extend(fetch_nearby_anytime_fitness(lat, lng))
    time.sleep(1)

# Remove duplicates by place_id
df_gyms = pd.DataFrame(all_gyms).drop_duplicates(subset=["place_id"])
print(f"✅ Found {len(df_gyms)} unique outlets.\n")

# Fetch reviews for each outlet
all_reviews = []
for _, row in df_gyms.iterrows():
    print(f"💬 Fetching reviews for: {row['name']}")
    reviews = fetch_reviews_for_outlet(row["place_id"], row["name"])
    all_reviews.extend(reviews)
    time.sleep(1)  # avoid API rate limit

# Step 3: Combine into DataFrame
df_reviews = pd.DataFrame(all_reviews)

print(df_reviews.head())

🔍 Fetching Anytime Fitness outlets across Singapore...
📍 Searching around 1.3521, 103.8198...
📍 Searching around 1.28, 103.85...
📍 Searching around 1.3, 103.9...
📍 Searching around 1.34, 103.7...
📍 Searching around 1.4, 103.9...
📍 Searching around 1.37, 103.77...
📍 Searching around 1.31, 103.84...
📍 Searching around 1.33, 103.93...
✅ Found 0 unique outlets.

Empty DataFrame
Columns: []
Index: []


In [40]:
# Step 4: Save to CSV
df_reviews.to_csv("anytime_reviews_singapore.csv", index=False, encoding="utf-8-sig")

Sentiment Analysis:

In [41]:
!pip install textblob
import nltk
nltk.download('brown')
nltk.download('punkt')
nltk.download('wordnet')



[nltk_data] Downloading package brown to /Users/breann/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to /Users/breann/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/breann/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [59]:
from textblob import TextBlob

df = pd.read_csv("anytime_reviews_singapore.csv")
df["sentiment"] = df["text"].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
df["sentiment_label"] = df["sentiment"].apply(
    lambda x: "Positive" if x > 0 else "Negative" if x < 0 else "Neutral"
)

print(df.head())

# Example summary
print(df["sentiment_label"].value_counts())

                   outlet_name          author  rating  \
0  Anytime Fitness Clarke Quay  In2rovert Life       5   
1  Anytime Fitness Clarke Quay   Hisyam Othman       5   
2  Anytime Fitness Clarke Quay   Corentin Roux       5   
3  Anytime Fitness Clarke Quay         SC Hung       5   
4  Anytime Fitness Clarke Quay         Micky P       4   

                                                text              time  \
0  Visited tons of AF Worldwide and this is one o...  in the last week   
1  I'm so lucky to try out this brand new awesome...        a week ago   
2  An excellent new opening filling the AF desert...        a week ago   
3  I was lucky to use this place on just their se...        a week ago   
4  Has that brand new car smell! Awesome gym dens...  in the last week   

   sentiment sentiment_label  
0   0.020068        Positive  
1   0.496656        Positive  
2   0.147430        Positive  
3   0.371667        Positive  
4   0.451705        Positive  
sentiment_label
Posi

In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   outlet_name      285 non-null    object 
 1   author           285 non-null    object 
 2   rating           285 non-null    int64  
 3   text             284 non-null    object 
 4   time             285 non-null    object 
 5   sentiment        285 non-null    float64
 6   sentiment_label  285 non-null    object 
dtypes: float64(1), int64(1), object(5)
memory usage: 15.7+ KB


In [44]:
df.isnull().sum()

outlet_name        0
author             0
rating             0
text               1
time               0
sentiment          0
sentiment_label    0
dtype: int64

In [45]:
df.dropna(inplace=True)

In [46]:
df.isnull().sum()

outlet_name        0
author             0
rating             0
text               0
time               0
sentiment          0
sentiment_label    0
dtype: int64

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 284 entries, 0 to 284
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   outlet_name      284 non-null    object 
 1   author           284 non-null    object 
 2   rating           284 non-null    int64  
 3   text             284 non-null    object 
 4   time             284 non-null    object 
 5   sentiment        284 non-null    float64
 6   sentiment_label  284 non-null    object 
dtypes: float64(1), int64(1), object(5)
memory usage: 17.8+ KB


In [62]:
df["sentiment_label"].value_counts()

sentiment_label
Positive    258
Negative     24
Neutral       3
Name: count, dtype: int64

In [61]:
df.head()

Unnamed: 0,outlet_name,author,rating,text,time,sentiment,sentiment_label
0,Anytime Fitness Clarke Quay,In2rovert Life,5,Visited tons of AF Worldwide and this is one o...,in the last week,0.020068,Positive
1,Anytime Fitness Clarke Quay,Hisyam Othman,5,I'm so lucky to try out this brand new awesome...,a week ago,0.496656,Positive
2,Anytime Fitness Clarke Quay,Corentin Roux,5,An excellent new opening filling the AF desert...,a week ago,0.14743,Positive
3,Anytime Fitness Clarke Quay,SC Hung,5,I was lucky to use this place on just their se...,a week ago,0.371667,Positive
4,Anytime Fitness Clarke Quay,Micky P,4,Has that brand new car smell! Awesome gym dens...,in the last week,0.451705,Positive


Getting Ratings and outlet information:

In [53]:
import requests
import time

API_KEY = "AIzaSyC6M-BJbnUubBk1CvFVoxdC21_zxdsaCSE"
query = "Anytime Fitness Singapore"

url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query={query}&key={API_KEY}"

places = []

while True:
    response = requests.get(url)
    data = response.json()
    
    for place in data.get("results", []):
        name = place.get("name")
        rating = place.get("rating")
        address = place.get("formatted_address")
        places.append((name, rating, address))
    
    # Check for next page
    next_page_token = data.get("next_page_token")
    if not next_page_token:
        break

    # Wait before next request
    time.sleep(2)
    url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?pagetoken={next_page_token}&key={API_KEY}"

# Convert to DataFrame (optional)
import pandas as pd
df = pd.DataFrame(places, columns=["Outlet", "Rating", "Address"])
print(df)

                                   Outlet  Rating  \
0             Anytime Fitness Tekka Place     4.9   
1                   Anytime Fitness Bugis     4.7   
2             Anytime Fitness Clarke Quay     4.9   
3             Anytime Fitness Jalan Besar     4.2   
4           Anytime Fitness The Concourse     3.2   
5               Anytime Fitness City Hall     4.3   
6        Anytime Fitness City Square Mall     4.9   
7           Anytime Fitness Raffles Place     4.7   
8                 Anytime Fitness Orchard     4.7   
9     Anytime Fitness Cineleisure Orchard     4.8   
10        Anytime Fitness Havelock Outram     4.9   
11             Anytime Fitness Kaki Bukit     4.7   
12               Anytime Fitness Balmoral     4.6   
13                 Anytime Fitness Dakota     5.0   
14                 Anytime Fitness Novena     4.4   
15            Anytime Fitness Bukit Merah     4.4   
16          Anytime Fitness Tanjong Pagar     4.3   
17              Anytime Fitness Boon Keng     

In [55]:
df.sort_values(by="Rating", ascending=True).head(5)
# worst outlets

Unnamed: 0,Outlet,Rating,Address
60,Anytime Fitness The Concourse,3.2,"300 Beach Rd, #02-01A/02, Singapore"
136,Anytime Fitness hillV2,3.2,"4 Hillview Rise, #02-18/19, Singapore"
117,Anytime Fitness Hwi Yoh CC,3.3,"23 Serangoon North Ave 4, #01-01 Hwi Yoh Commu..."
35,Anytime Fitness Upper Cross Street,3.5,"531 Upper Cross Street #01-07, Hong Lim Complex"
114,Anytime Fitness Kovan,3.6,"JForte Sportainment Centre, 50 Hougang Ave 1, ..."


In [54]:
import requests
import time
import pandas as pd

API_KEY = "AIzaSyC6M-BJbnUubBk1CvFVoxdC21_zxdsaCSE"

# Coordinates roughly covering Singapore’s main regions
locations = [
    (1.3521, 103.8198),  # Central
    (1.2800, 103.8500),  # Downtown
    (1.3000, 103.9000),  # East
    (1.3400, 103.7000),  # West
    (1.4000, 103.9000),  # North
    (1.3700, 103.8000),  # North-Central
]

places = []

for lat, lng in locations:
    url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lng}&radius=5000&keyword=Anytime+Fitness&key={API_KEY}"
    
    while True:
        response = requests.get(url)
        data = response.json()
        
        for place in data.get("results", []):
            name = place.get("name")
            rating = place.get("rating")
            address = place.get("vicinity")
            places.append((name, rating, address))
        
        token = data.get("next_page_token")
        if not token:
            break
        time.sleep(2)
        url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?pagetoken={token}&key={API_KEY}"

# Convert to DataFrame and remove duplicates
df = pd.DataFrame(places, columns=["Outlet", "Rating", "Address"]).drop_duplicates("Outlet")
print("Total outlets found:", len(df))
print(df.sort_values("Rating"))


Total outlets found: 101
                                 Outlet  Rating  \
60        Anytime Fitness The Concourse     3.2   
136              Anytime Fitness hillV2     3.2   
117          Anytime Fitness Hwi Yoh CC     3.3   
35   Anytime Fitness Upper Cross Street     3.5   
114               Anytime Fitness Kovan     3.6   
..                                  ...     ...   
108              Anytime Fitness Jurong     5.0   
74            Anytime Fitness Joo Chiat     5.0   
73        Anytime Fitness Simpang Bedok     5.0   
28               Anytime Fitness Dakota     5.0   
92      Anytime Fitness MacPherson Mall     5.0   

                                               Address  
60                 300 Beach Rd, #02-01A/02, Singapore  
136              4 Hillview Rise, #02-18/19, Singapore  
117  23 Serangoon North Ave 4, #01-01 Hwi Yoh Commu...  
35     531 Upper Cross Street #01-07, Hong Lim Complex  
114  JForte Sportainment Centre, 50 Hougang Ave 1, ...  
..                  

In [57]:
import requests
import time
import pandas as pd

API_KEY = "AIzaSyC6M-BJbnUubBk1CvFVoxdC21_zxdsaCSE"

# Expanded grid across Singapore
locations = [
    (1.3521, 103.8198),  # Central
    (1.2800, 103.8500),  # Downtown
    (1.3000, 103.9000),  # East
    (1.3400, 103.7000),  # West
    (1.4000, 103.9000),  # North
    (1.3700, 103.8000),  # North-Central
    (1.3100, 103.7700),  # Southwest
    (1.4200, 103.7600),  # Northwest
    (1.2900, 103.8200),  # Southeast
]

places = []

for lat, lng in locations:
    url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lng}&radius=5000&keyword=Anytime+Fitness&key={API_KEY}"
    
    while True:
        response = requests.get(url)
        data = response.json()
        
        for place in data.get("results", []):
            name = place.get("name")
            rating = place.get("rating")
            address = place.get("vicinity")
            maps_link = f"https://www.google.com/maps/search/?api=1&query={name.replace(' ', '+')}+{address.replace(' ', '+')}"
            places.append((name, rating, address, maps_link))
        
        token = data.get("next_page_token")
        if not token:
            break
        time.sleep(2)
        url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?pagetoken={token}&key={API_KEY}"

# Convert to DataFrame and remove duplicates
df = pd.DataFrame(places, columns=["Outlet", "Rating", "Address", "MapLink"]).drop_duplicates("Outlet")
print("Total outlets found:", len(df))
(df.sort_values("Rating")).head()

Total outlets found: 120


Unnamed: 0,Outlet,Rating,Address,MapLink
57,Anytime Fitness The Concourse,3.2,"300 Beach Rd, #02-01A/02, Singapore",https://www.google.com/maps/search/?api=1&quer...
135,Anytime Fitness hillV2,3.2,"4 Hillview Rise, #02-18/19, Singapore",https://www.google.com/maps/search/?api=1&quer...
116,Anytime Fitness Hwi Yoh CC,3.3,"23 Serangoon North Ave 4, #01-01 Hwi Yoh Commu...",https://www.google.com/maps/search/?api=1&quer...
32,Anytime Fitness Upper Cross Street,3.5,"531 Upper Cross Street #01-07, Hong Lim Complex",https://www.google.com/maps/search/?api=1&quer...
89,Anytime Fitness Guillemard,3.6,"100 Guillemard Rd, #01-08, Singapore",https://www.google.com/maps/search/?api=1&quer...


In [None]:
(df.sort_values("Rating")).head()