In [1]:
import os
from typing import Iterable, Union
from itertools import product
from numpy import add, multiply
import pandas as pd
import serpapi
from serpapi import GoogleSearch

In [2]:
starting_latitude: float = 57.685
starting_longtitde: float = 11.905

increment_lat: float = 0.01
increment_long: float = 0.04


def addition_loop(
    start_value: float, increment: float, add_n_elements: int
) -> list[float]:
    """Function to add the needed increments to the cooridnates

    Args:
        start_value (float): start value for lat or long
        increment (float): The amount to increase with
        add_n_element (int): Number of additional elements required

    Returns:
        list[float]: List with the coordiates
    """
    empty_list: list[float] = [start_value]
    for ind in range(add_n_elements):
        element = round(add(start_value, (multiply(increment, (add(ind, 1))))), 3)
        empty_list.append(element)
    return empty_list


lat_list: list[float] = addition_loop(starting_latitude, increment_lat, 3)
long_list: list[float] = addition_loop(starting_longtitde, increment_long, 3)

In [3]:
print(lat_list)
print(long_list)

[57.685, 57.695, 57.705, 57.715]
[11.905, 11.945, 11.985, 12.025]


In [4]:
# Let's create tuples with lat,long values
combos_coord: Iterable = product(lat_list, long_list)
combos_coord_list: list[tuple[float, ...]] = list(combos_coord)

In [5]:
print(combos_coord_list)

[(57.685, 11.905), (57.685, 11.945), (57.685, 11.985), (57.685, 12.025), (57.695, 11.905), (57.695, 11.945), (57.695, 11.985), (57.695, 12.025), (57.705, 11.905), (57.705, 11.945), (57.705, 11.985), (57.705, 12.025), (57.715, 11.905), (57.715, 11.945), (57.715, 11.985), (57.715, 12.025)]


In [6]:
print(f"We'll search across {len(combos_coord_list)} grids in Google maps in Göteborg, Sweden")

We'll search across 16 grids in Google maps in Göteborg, Sweden


In [13]:
params: dict[str, Union[str, int]] = {
    "api_key": os.getenv("SERP_API_KEY"),
    "device": "desktop",
    "engine": "google_maps",
    "type": "search",
    "google_domain": "google.se",
    "q": "Retauranger göteborg",
    "hl": "sv",
    "ll": "@57.685,11.905,16z", # This is what we'll modify in the loops below
    "gl": "se",
    "start": 0, # As well as this
}

In [14]:
params

{'api_key': '558eaada5daf3e922befa697a766112b5618cea63d0c00eac0ba8450415ee399',
 'device': 'desktop',
 'engine': 'google_maps',
 'type': 'search',
 'google_domain': 'google.se',
 'q': 'Retauranger göteborg',
 'hl': 'sv',
 'll': '@57.685,11.905,16z',
 'gl': 'se',
 'start': 0}

In [15]:
# Don't delete....!!
search = GoogleSearch(params)
results = search.get_dict()

https://serpapi.com/search


In [19]:
results.keys()

dict_keys(['search_metadata', 'search_parameters', 'search_information', 'local_results', 'serpapi_pagination'])

In [20]:
results.get('serpapi_pagination')

{'next': 'https://serpapi.com/search.json?engine=google_maps&google_domain=google.se&hl=sv&ll=%4057.685%2C11.905%2C16z&q=Retauranger+g%C3%B6teborg&start=20&type=search'}

In [21]:
"serpapi_pagination" in results and "next" in results[
    "serpapi_pagination"
] and "local_results" in results and results.get("local_results") is not None

True

In [26]:
params: dict[str, Union[str, int]] = {
    "api_key": os.getenv("SERP_API_KEY"),
    "device": "desktop",
    "engine": "google_maps",
    "type": "search",
    "google_domain": "google.se",
    "q": "Retauranger göteborg",
    "hl": "sv",
    "ll": "@57.685,11.905,16z", # This is what we'll modify in the loops below
    "gl": "se",
    "start": 0, # As well as this
}

df = pd.DataFrame()

for grid, coord in enumerate(combos_coord_list):
    print(f"Running grid: {grid + 1}")
    params["ll"]: str = f"@{coord[0]},{coord[1]},16z"

    for start_num in range(0, 120, 20):
        print(f"page: {int(start_num/20)}")
        if start_num == 0:
            params["start"]: int = start_num
            search: serpapi.google_search.GoogleSearch = GoogleSearch(params)
            results: dict = search.get_dict()
            try:
                df: pd.DataFrame = pd.concat(
                    [df, pd.DataFrame(search.get_dict()["local_results"])]
                )
            except Exception:
                print("An exception was raised")
                continue

        elif (
            "serpapi_pagination" in results
            and "next" in results["serpapi_pagination"]
            and "local_results" in results
            and results.get("local_results") is not None
        ):
            params["start"]: int = start_num
            search: serpapi.google_search.GoogleSearch = GoogleSearch(params)
            results: dict = search.get_dict()
            try:
                df: pd.DataFrame = pd.concat(
                    [df, pd.DataFrame(search.get_dict()["local_results"])]
                )
            except Exception:
                print("An exception was raised")
                continue
        else:
            print("loop was broken")
            break

Running grid: 1
page: 0
https://serpapi.com/search
https://serpapi.com/search
page: 1
https://serpapi.com/search
https://serpapi.com/search
page: 2
https://serpapi.com/search
https://serpapi.com/search
page: 3
https://serpapi.com/search
https://serpapi.com/search
page: 4
https://serpapi.com/search
https://serpapi.com/search
page: 5
https://serpapi.com/search
https://serpapi.com/search
Running grid: 2
page: 0
https://serpapi.com/search
https://serpapi.com/search
page: 1
https://serpapi.com/search
https://serpapi.com/search
page: 2
https://serpapi.com/search
https://serpapi.com/search
page: 3
https://serpapi.com/search
https://serpapi.com/search
page: 4
https://serpapi.com/search
https://serpapi.com/search
page: 5
https://serpapi.com/search
https://serpapi.com/search
Running grid: 3
page: 0
https://serpapi.com/search
https://serpapi.com/search
page: 1
https://serpapi.com/search
https://serpapi.com/search
page: 2
https://serpapi.com/search
https://serpapi.com/search
page: 3
https://serpap

In [35]:
df.drop_duplicates(subset=["title", "place_id"]).shape

(576, 26)

In [38]:
"Jaipur" in df.title

False

In [40]:
# check if a value is in a column

df: pd.DataFrame = df.drop_duplicates(subset=["title", "place_id"])

In [43]:
df_sorted = df.sort_values(by=["rating", "reviews"], ascending=False)