In [3]:
%pip install asyncio aiohttp pandas

import aiohttp
import asyncio
import pandas as pd

async def fetch_data(session, url, datakey=None):
    async with session.get(url) as response:
        print(f"Fetched {url} with status {response.status}")
        data = await response.json()
        if datakey and datakey in data:
            data = data[datakey]
        return response.status, data

async def fetch_guide_data(session, base_url):
    page = 1
    all_data = []
    while True:
        url = f"{base_url}?page={page}&page_size=1000"
        status, data = await fetch_data(session, url, datakey='results')
        # all_data.extend(data)
        # break
        if status == 404 or not data:
            break
        all_data.extend(data)
        page += 1
    return all_data




async def main():
    # Define your segments here
    # Define the size of each segment
    lat_size = 10
    lon_size = 10

    # Generate segments
    segments = []
    for lat in range(-90, 90, lat_size):
        for lon in range(-340, 340, lon_size):
            top_right = f"{lat+lat_size},{lon+lon_size}"
            bottom_left = f"{lat},{lon}"
            segments.append((top_right, bottom_left))
    # segments=[('88,340', '-88,-340')]
    async with aiohttp.ClientSession() as session:
        # Fetch guide data only once
        guide_base_url = "https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/"
        guide_data = await fetch_guide_data(session, guide_base_url)

        # Fetch map data for each segment
        tasks = [fetch_data(session, f"https://travel.padi.com/api/v2/travel/dsl/dive-sites/map/?top_right={segment[0]}&bottom_left={segment[1]}") for segment in segments]
        map_results = await asyncio.gather(*tasks)
        # Combining data into a single dictionary by ID
        all_data = {}
        print(map_results)
        print(guide_data)
        for batch in map_results:
            for item in batch[1]:
                if item['id'] in all_data:
                    all_data[item['id']].update(item)
                else:
                    all_data[item['id']] = item
        for item in guide_data:
            if item['id'] in all_data:
                all_data[item['id']].update(item)
            else:
                all_data[item['id']] = item

        # Convert the dictionary to a pandas DataFrame
        df = pd.DataFrame.from_dict(all_data, orient='index')
        return df

# Running the async main function
df = await main()
df.to_csv('dive_sites.csv.gz', index=False, compression='gzip')
# The resulting DataFrame 'df' contains your combined data


Note: you may need to restart the kernel to use updated packages.
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=1&page_size=1000 with status 200
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=2&page_size=1000 with status 200
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=3&page_size=1000 with status 200
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=4&page_size=1000 with status 200
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=5&page_size=1000 with status 200
Fetched https://travel.padi.com/api/v2/travel/dive-guide/world/all/dive-sites/?page=6&page_size=1000 with status 404
Fetched https://travel.padi.com/api/v2/travel/dsl/dive-sites/map/?top_right=-80,-330&bottom_left=-90,-340 with status 200
Fetched https://travel.padi.com/api/v2/travel/dsl/dive-sites/map/?top_right=-80,-320&bottom_left=-90,-330 with

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [4]:
divesites = pd.read_csv('dive_sites.csv.gz', compression='gzip')
divesites.columns

Index(['id', 'latitude', 'longitude', 'images', 'marineLife', 'maximumDepth',
       'title', 'travelUrl', 'types'],
      dtype='object')

In [22]:
not_null_df = divesites[divesites['title'] == 'Hole in the Wall']
not_null_df

Unnamed: 0,id,latitude,longitude,images,marineLife,maximumDepth,title,travelUrl,types
1718,2618,12.461909,-69.966742,[],[],,Hole in the Wall,/dive-site/aruba/hole-in-the-wall-2/,"['Beach', 'Reef']"
2171,4242,13.522,120.9,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],"{'FEET': {'value': 49.2126, 'title': 'Feet'}, ...",Hole in the Wall,/dive-site/philippines/hole-in-the-wall-3/,"['Reef', 'Wall', 'Cavern']"
2783,699,25.340846,56.378188,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Hole in the Wall,/dive-site/united-arab-emirates/hole-in-the-wall/,['Reef']


Unnamed: 0,id,latitude,longitude,images,marineLife,maximumDepth,title,travelUrl,types
4,1193,-42.734531,-65.03078,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Parque las Piedras,/dive-site/argentina/parque-las-piedras/,['Reef']
6,1194,-42.765446,-65.022883,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Naufragio Emma,/dive-site/argentina/naufragio-emma/,['Reef']
7,1195,-42.770361,-65.021617,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Parque Nuevo,/dive-site/argentina/parque-nuevo/,"['Beach', 'Reef']"
8,1189,-42.772156,-65.00988,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Hundimiento Albatros,/dive-site/argentina/hundimiento-albatros/,['Reef']
9,1187,-42.774109,-65.009708,[{'800x800': 'https://d2p1cf6997m1ir.cloudfron...,[],,Hundimiento Urabain,/dive-site/argentina/hundimiento-urabain/,['Reef']
