# Installation

In [21]:
! pip install outscraper pandas

Collecting pandas
  Obtaining dependency information for pandas from https://files.pythonhosted.org/packages/16/c6/75231fd47afd6b3f89011e7077f1a3958441264aca7ae9ff596e3276a5d0/pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas)
  Obtaining dependency information for numpy>=1.23.2 from https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Using cached numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (114 kB)
Collecting pytz>=2020.1 (from pandas)
  Obtaining dependency information for pytz>=2020.1 from https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl.metadata
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
 

# Initialization

In [4]:
from getpass import getpass
import os
from outscraper import ApiClient

# Prompt the user to enter the API key
os.environ["OUTSCRAPER_API_KEY"] = getpass("Outscraper API Key:")

# Use the API key from the environment variable
client = ApiClient(api_key=os.environ["OUTSCRAPER_API_KEY"])

# Scrape Google Search

In [None]:
# Googel Search
results = client.google_search('bitcoin')

# Googel Search News
results = client.google_search_news('election', language='en')

# Scrape Google Maps (Places)


In [5]:
# Search for businesses in specific locations:
results = client.google_maps_search('pediatric dentist novato ca usa', limit=3, language='en')

In [None]:
print(results)

In [None]:
results[0]

## How to get the name and placeid from the returned list?
The data you provided is a list within a list, containing dictionaries. To access the 'name' and 'place_id' of each dictionary, you can use a nested loop. 

In [11]:
for sublist in results:
    for item in sublist:
        name = item['name']
        place_id = item['place_id']
        print(f"Name: {name}, Place ID: {place_id}")

Name: Novato Children's Dentistry, Place ID: ChIJixa-83y7hYARYPidwgCrF7A
Name: Novato Pediatric Dentistry, Place ID: ChIJJ0DU9nu7hYAR12EPlrPOHjw
Name: Mr. Benjamin Robinson, Place ID: ChIJXWmY8Hu7hYARtTzZM2vEXho


In [18]:
# Get data of the specific place by id - Poppy
result = client.google_maps_search('ChIJLdUfAim9hYARqRFWA5LwoAI', language='en')

In [None]:
result[0]

# Scrape Google Maps Reviews


In [None]:
# Get reviews of the specific place by id
results = client.google_maps_reviews('ChIJLdUfAim9hYARqRFWA5LwoAI', reviews_limit=20, language='en')

In [None]:
import pandas as pd

# Specify the place_id
place_id = 'ChIJLdUfAim9hYARqRFWA5LwoAI' #poppy

# Specify the number of reviews to fetch
num_reviews = 80

# Fetch the specified number of reviews for the place
reviews = client.google_maps_reviews(place_id, reviews_limit=num_reviews)

# Convert the reviews to a DataFrame
df = pd.DataFrame(reviews[0]['reviews_data'])

# Export the DataFrame to a CSV file
df.to_csv('poppy_reviews_all.csv', index=False)

### Use the addresss to pull the review without the place id. - this will be a more general function

In [None]:
# Get reviews for places found by search query
# limit returns max number of places - should be set to 1
reviews = client.google_maps_reviews('Poppy Kids pediatric dentistry Novato CA usa', reviews_limit=7, limit=1, language='en')
print(reviews)

In [None]:
import json

# Assuming 'reviews' is your JSON data
# Python's built-in json module to prettify the JSON output directly
print(json.dumps(reviews, indent=4))

# Alles Zusammen!!!
### The question is do I need to convert the data to a data frame - I think not. It already returns JSON which should work well with assistants API. 

In [None]:
from getpass import getpass
import os
from outscraper import ApiClient
import json

# Prompt the user to enter the API key
os.environ["OUTSCRAPER_API_KEY"] = getpass("Outscraper API Key:")

# Use the API key from the environment variable
client = ApiClient(api_key=os.environ["OUTSCRAPER_API_KEY"])

In [25]:
reviews = client.google_maps_reviews('Poppy Kids pediatric dentistry Novato CA usa', reviews_limit=50, limit=1, language='en')

# Write the JSON data to a file
with open('poppy_reviews_fifty.json', 'w') as f:
    json.dump(reviews, f)

### Once I have the json document saved, I open it using the JSON formatter (set to default - also have prettify) and right clickk to format to see the details. 

In [None]:
# Get reviews for places found by search query (Use the proper business name to fetch the correct results)
reviews = client.google_maps_reviews('Poppy Kids pediatric dentistry Novato CA usa', reviews_limit=7, limit=1, language='en')


# Convert the reviews to a DataFrame
df = pd.DataFrame(reviews[0]['reviews_data'])

# Export the DataFrame to a CSV file
df.to_csv('poppy_reviews_all.csv', index=False)

# Other useful functions

In [None]:
# Search with many queries (batching)
results = client.google_maps_search([
    'restaurants brooklyn usa',
    'bars brooklyn usa',
], language='en')

# Get reviews for places found by search query
results = client.google_maps_reviews('Memphis Seoul brooklyn usa', reviews_limit=20, limit=500, language='en')

# Get only new reviews during last 24 hours (Jay - This is what I need to create a piline for taking in the reviews every day) - This can easily run in Snowflake and get data - python file. 
# This is a good way to get the reviews every day
from datetime import datetime, timedelta
yesterday_timestamp = int((datetime.now() - timedelta(1)).timestamp())

results = client.google_maps_reviews(
    'ChIJrc9T9fpYwokRdvjYRHT8nI4', sort='newest', cutoff=yesterday_timestamp, reviews_limit=100, language='en')


#### Get Business Photos:
results = client.google_maps_photos(
    'Trump Tower, NY, USA', photosLimit=20, language='en')

results = client.google_maps_directions(['29.696596, 76.994928    30.7159662444353, 76.8053887016268', '29.696596, 76.994928    30.723065, 76.770169'])