In [7]:
import os
import requests
import time
import asyncio
import aiohttp
import pandas as pd
import geopandas as gpd
from urllib.parse import urlencode
import nest_asyncio
nest_asyncio.apply()


url = r'https://services2.arcgis.com/FiaPA4ga0iQKduv3/arcgis/rest/services/Tornado_Tracks_1950_2017_1/FeatureServer/0/query?'

params = {
    'where': '1=1',
    'geometryType': 'esriGeometryPolygon',
    'returnExceededLimitFeatures': 'false',
    'inSR': '4326',
    'units': 'esriSRUnit_Meter',
    'returnGeometry': 'true',
    'outFields': '*',
    'f': 'pgeojson',
    'resultOffset': '0',
    'resultRecordCount': '2000'
}

max_records = 660000

# Get the total number of records
count_params = params.copy()
count_params['returnCountOnly'] = 'true'
count_url = url + urlencode(count_params)
count_response = requests.get(count_url)
total_records = count_response.json()['properties']['count']

output_directory = "data"

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

async def fetch(session, offset):
    params['resultOffset'] = str(offset)
    url_final = url + urlencode(params)
    async with session.get(url_final) as response:
        return await response.text()

async def main():
    # Calculate the number of pages to fetch
    num_pages = min(total_records, max_records) // 2000 + (min(total_records, max_records) % 2000 > 0)

    # Generate the offsets based on the number of pages
    offsets = [i * 2000 for i in range(num_pages)]
    
    geo_dataframes = []

    async with aiohttp.ClientSession() as session:
        for offset in offsets:
            try:
                data = await fetch(session, offset)
                df = gpd.read_file(data)
                geo_dataframes.append(df)
                print(f'Fetched data for offset {offset}')
            except Exception as e:
                print(f'Error fetching data for offset {offset}: {e}')

    final_geo_dataframe = gpd.GeoDataFrame(pd.concat(geo_dataframes, ignore_index=True), crs=df.crs)
    final_geo_dataframe.to_file(f'{output_directory}/tornado_history.shp')
    print(f'Saved final GeoDataFrame with {len(final_geo_dataframe)} records')

if __name__ == '__main__':
    start_time = time.time()
    asyncio.run(main())
    print(f'Total time: {time.time() - start_time:.2f} seconds')


Fetched data for offset 0
Fetched data for offset 2000
Fetched data for offset 4000
Fetched data for offset 6000
Fetched data for offset 8000
Fetched data for offset 10000
Fetched data for offset 12000
Fetched data for offset 14000


  return {


Fetched data for offset 16000
Fetched data for offset 18000
Fetched data for offset 20000
Fetched data for offset 22000
Fetched data for offset 24000
Fetched data for offset 26000
Fetched data for offset 28000
Fetched data for offset 30000
Fetched data for offset 32000
Fetched data for offset 34000
Fetched data for offset 36000
Fetched data for offset 38000
Fetched data for offset 40000
Fetched data for offset 42000
Fetched data for offset 44000
Fetched data for offset 46000
Fetched data for offset 48000
Fetched data for offset 50000
Fetched data for offset 52000
Fetched data for offset 54000
Fetched data for offset 56000
Fetched data for offset 58000
Fetched data for offset 60000
Fetched data for offset 62000
Fetched data for offset 64000
Fetched data for offset 66000
Fetched data for offset 68000


  final_geo_dataframe.to_file(f'{output_directory}/tornado_history.shp')


Saved final GeoDataFrame with 68701 records
Total time: 44.53 seconds
