In [14]:
import requests
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
API_KEY = os.getenv('API_KEY')

print(API_KEY)


AIzaSyDJvY6rfaPQznVZY8uUPggLbULKIYLFha4


In [9]:
import requests
import time
import pandas as pd
import json
from datetime import datetime

# Set your API key
API_KEY = "AIzaSyDypG-8_jEmrcd6JJmcUWGbbYjLY32kM2s"


In [13]:

def get_mall_info(mall_name, location=""):
    """Finds the mall using Google Places API."""
    search_url = f"https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
        "query": mall_name + " mall",
        "location": location,
        "key": API_KEY
    }
    response = requests.get(search_url, params=params)
    data = response.json()

    print(data)
    if "results" in data and data["results"]:
        mall = data["results"][0]  # Take the first result
        return {
            "name": mall["name"],
            "place_id": mall["place_id"],
            "address": mall["formatted_address"],
            "lat": mall["geometry"]["location"]["lat"],
            "lng": mall["geometry"]["location"]["lng"]
        }
    else:
        print(f"No results found for {mall_name}")
        return None


In [15]:
get_mall_info(mall_name='Dubai Mall', location='')

{'error_message': 'This API project is not authorized to use this API.', 'html_attributions': [], 'results': [], 'status': 'REQUEST_DENIED'}
No results found for Dubai Mall


In [3]:

def get_shops_in_mall(lat, lng, radius=300, next_page_token=None):
    """Finds shops inside the mall using Nearby Search API."""
    nearby_url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        "location": f"{lat},{lng}",
        "radius": radius,  # Adjust radius based on mall size
        "type": "store",  # Filter by store/shop types
        "key": API_KEY
    }
    
    if next_page_token:
        params = {"pagetoken": next_page_token, "key": API_KEY}
        
    response = requests.get(nearby_url, params=params)
    data = response.json()
    shops = []
    
    if "results" in data:
        for shop in data["results"]:
            shops.append({
                "name": shop["name"],
                "place_id": shop["place_id"],
                "address": shop.get("vicinity", "N/A"),
                "rating": shop.get("rating", "N/A"),
                "user_ratings_total": shop.get("user_ratings_total", 0),
                "types": shop.get("types", [])
            })
    
    # Check if there are more results
    next_page_token = data.get("next_page_token")
    
    return shops, next_page_token


In [4]:

def get_shop_details(place_id):
    """Gets detailed information about a shop using Place Details API."""
    details_url = f"https://maps.googleapis.com/maps/api/place/details/json"
    params = {
        "place_id": place_id,
        "fields": "name,formatted_address,formatted_phone_number,website,opening_hours,price_level,rating,review",
        "key": API_KEY
    }
    
    response = requests.get(details_url, params=params)
    data = response.json()
    
    if "result" in data:
        result = data["result"]
        
        # Get opening hours if available
        opening_hours = {}
        if "opening_hours" in result and "weekday_text" in result["opening_hours"]:
            opening_hours = result["opening_hours"]["weekday_text"]
        
        return {
            "name": result.get("name", "N/A"),
            "address": result.get("formatted_address", "N/A"),
            "phone": result.get("formatted_phone_number", "N/A"),
            "website": result.get("website", "N/A"),
            "opening_hours": opening_hours,
            "price_level": result.get("price_level", "N/A"),
            "rating": result.get("rating", "N/A"),
            "details_available": True
        }
    else:
        return {"details_available": False}


In [5]:

def get_shop_reviews(place_id, max_reviews=5):
    """Gets reviews for a shop using Place Details API."""
    reviews_url = f"https://maps.googleapis.com/maps/api/place/details/json"
    params = {
        "place_id": place_id,
        "fields": "reviews",
        "key": API_KEY,
        "language": "en"  # Get reviews in English
    }
    
    response = requests.get(reviews_url, params=params)
    data = response.json()
    
    reviews = []
    if "result" in data and "reviews" in data["result"]:
        for review in data["result"]["reviews"][:max_reviews]:
            reviews.append({
                "author_name": review.get("author_name", "Anonymous"),
                "rating": review.get("rating", 0),
                "time": datetime.fromtimestamp(review.get("time", 0)).strftime('%Y-%m-%d'),
                "text": review.get("text", "No comment"),
                "language": review.get("language", "unknown")
            })
    
    return reviews


In [6]:

def scrape_mall_and_shops(mall_name, location="", radius=300, get_details=True, get_reviews=True, max_reviews=5):
    """Main function to scrape mall information and all shops with reviews."""
    print(f"Finding information for {mall_name}...")
    
    # Get mall information
    mall_info = get_mall_info(mall_name, location)
    if not mall_info:
        print(f"Could not find mall: {mall_name}")
        return None
    
    print(f"Found mall: {mall_info['name']} at {mall_info['address']}")
    
    # Get shops in the mall
    all_shops = []
    next_page_token = None
    page_count = 1
    
    while True:
        print(f"Fetching shops page {page_count}...")
        shops, next_page_token = get_shops_in_mall(
            mall_info["lat"], 
            mall_info["lng"], 
            radius=radius, 
            next_page_token=next_page_token
        )
        
        all_shops.extend(shops)
        
        if not next_page_token:
            break
            
        # Need to wait a short time before using the next_page_token
        time.sleep(2)
        page_count += 1
    
    print(f"Found {len(all_shops)} shops in {mall_info['name']}")
    
    # Get detailed information and reviews for each shop
    for i, shop in enumerate(all_shops):
        print(f"Processing shop {i+1}/{len(all_shops)}: {shop['name']}")
        
        # Get shop details if requested
        if get_details:
            details = get_shop_details(shop["place_id"])
            if details["details_available"]:
                for key, value in details.items():
                    if key != "details_available":
                        shop[key] = value
            time.sleep(0.5)  # Avoid hitting rate limits
        
        # Get shop reviews if requested
        if get_reviews:
            shop["reviews"] = get_shop_reviews(shop["place_id"], max_reviews)
            time.sleep(0.5)  # Avoid hitting rate limits
    
    # Create result dictionary
    result = {
        "mall": mall_info,
        "shops": all_shops,
        "total_shops": len(all_shops)
    }
    
    return result


In [7]:

def save_results_to_csv(result, base_filename):
    """Saves the scraped data to CSV files."""
    if not result:
        print("No data to save")
        return
    
    # Save mall information
    mall_df = pd.DataFrame([result["mall"]])
    mall_df.to_csv(f"{base_filename}_mall_info.csv", index=False)
    
    # Save shop information (excluding reviews)
    shops = []
    for shop in result["shops"]:
        shop_data = shop.copy()
        if "reviews" in shop_data:
            del shop_data["reviews"]
        if "types" in shop_data:
            shop_data["types"] = ", ".join(shop_data["types"])
        if "opening_hours" in shop_data and isinstance(shop_data["opening_hours"], list):
            shop_data["opening_hours"] = "; ".join(shop_data["opening_hours"])
        shops.append(shop_data)
    
    shops_df = pd.DataFrame(shops)
    shops_df.to_csv(f"{base_filename}_shops.csv", index=False)
    
    # Save reviews as a separate CSV
    all_reviews = []
    for shop in result["shops"]:
        if "reviews" in shop and shop["reviews"]:
            for review in shop["reviews"]:
                review_data = review.copy()
                review_data["shop_name"] = shop["name"]
                review_data["shop_place_id"] = shop["place_id"]
                all_reviews.append(review_data)
    
    if all_reviews:
        reviews_df = pd.DataFrame(all_reviews)
        reviews_df.to_csv(f"{base_filename}_reviews.csv", index=False)
    
    # Save the entire dataset as JSON for backup
    with open(f"{base_filename}_full_data.json", 'w') as f:
        json.dump(result, f, indent=2)
    
    print(f"Data saved to {base_filename}_*.csv files")


In [8]:

def main():
    """Main function to run the scraper."""
    mall_name = input("Enter mall name (e.g., 'Sunway Pyramid'): ")
    location = input("Enter location coordinates (optional, format: lat,lng): ")
    radius = input("Enter search radius in meters (default: 300): ")
    
    if not radius:
        radius = 300
    else:
        radius = int(radius)
        
    max_reviews = input("Enter maximum number of reviews per shop (default: 5): ")
    if not max_reviews:
        max_reviews = 5
    else:
        max_reviews = int(max_reviews)
    
    # Generate a filename based on mall name
    base_filename = mall_name.lower().replace(" ", "_")
    
    # Start scraping
    print(f"Starting to scrape {mall_name}...")
    result = scrape_mall_and_shops(
        mall_name=mall_name,
        location=location,
        radius=radius,
        get_details=True,
        get_reviews=True,
        max_reviews=max_reviews
    )
    
    if result:
        # Save the results
        save_results_to_csv(result, base_filename)
        print(f"Successfully scraped information for {result['total_shops']} shops in {mall_name}")
    else:
        print("Scraping failed")


In [10]:
main()

Enter mall name (e.g., 'Sunway Pyramid'):  Emporium Mall
Enter location coordinates (optional, format: lat,lng):  
Enter search radius in meters (default: 300):  
Enter maximum number of reviews per shop (default: 5):  


Starting to scrape Emporium Mall...
Finding information for Emporium Mall...
No results found for Emporium Mall
Could not find mall: Emporium Mall
Scraping failed


In [8]:
import requests

API_KEY = "AIzaSyDypG-8_jEmrcd6JJmcUWGbbYjLY32kM2s"
place_name = "Dubai Mall"
location = "25.1972° N, 55.2797° E"

url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place_name}&inputtype=textquery&locationbias=point:{location}&key={API_KEY}"
response = requests.get(url)
place_id = response.json()
print("Place ID:", place_id)


Place ID: {'candidates': [], 'error_message': 'This API project is not authorized to use this API.', 'status': 'REQUEST_DENIED'}


In [4]:
print(response)

<Response [200]>
