In [23]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.




In [1]:
import pandas as pd
import requests
import re

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
# Define the endpoint URL for the Socrata Discovery API
url = "https://api.us.socrata.com/api/catalog/v1"

# Set the limit of records per request (100 is the typical max for Socrata)
limit = 100
offset = 0
all_datasets = []

# Loop to paginate through all available datasets
while True:
    # Define parameters with limit and offset
    params = {
        "only": "datasets",
        "limit": limit,
        "offset": offset
    }
    
    # Make the GET request
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Extract dataset information
        datasets = data.get("results", [])
        
        # If no more results, break the loop
        if not datasets:
            break
        
        # Append data to the main list
        for dataset in datasets:
            all_datasets.append({
                "name": dataset['resource']['name'],
                "link": dataset['permalink'],
                "description": dataset['resource'].get('description', 'No description available'),
                "domain": dataset['metadata'].get('domain'),  # Optional domain info for city/county identification
                "data_columns": dataset['resource']['columns_name'],
                "data_columns_types": dataset['resource']['columns_datatype'],
                "data_columns_description": dataset['resource']['columns_description']
            })
        
        # Update the offset to fetch the next batch
        offset += limit
    else:
        print(f"Failed to retrieve datasets: {response.status_code}")
        break

# Create a DataFrame with all datasets
df = pd.DataFrame(all_datasets)

# Display the first few rows
print("Total Datasets Retrieved:", len(df))

Failed to retrieve datasets: 400
Total Datasets Retrieved: 10000


In [4]:
state_codes_pattern = r'\b(AL|AK|AZ|AR|CA|CO|CT|DE|FL|GA|HI|ID|IL|IN|IA|KS|KY|LA|ME|MD|MA|MI|MN|MS|MO|MT|NE|NV|NH|NJ|NM|NY|NC|ND|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VT|VA|WA|WV|WI|WY)\b'


In [5]:
def extract_state(domain):
    match = re.search(state_codes_pattern, domain, re.IGNORECASE)
    return match.group(0).upper() if match else None

In [6]:
df['potential_state_code'] = df['domain'].apply(extract_state)

In [7]:
def clean_text(text):
    if isinstance(text, str):
        # Remove non-printable characters
        return re.sub(r'[\x00-\x1F\x7F]', '', text)
    return text

In [8]:
df = df.applymap(clean_text)

  df = df.applymap(clean_text)


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   name                      10000 non-null  object
 1   link                      10000 non-null  object
 2   description               10000 non-null  object
 3   domain                    10000 non-null  object
 4   data_columns              10000 non-null  object
 5   data_columns_types        10000 non-null  object
 6   data_columns_description  10000 non-null  object
 7   potential_state_code      3893 non-null   object
dtypes: object(8)
memory usage: 625.1+ KB


In [10]:
df.head()

Unnamed: 0,name,link,description,domain,data_columns,data_columns_types,data_columns_description,potential_state_code
0,Dallas Police Active Calls,https://www.dallasopendata.com/d/9fxf-t2tr,<b><p>Due to technical issues the Active Calls...,www.dallasopendata.com,"[Unit Num, Nature of Call, Block, Time, Beat, ...","[Text, Text, Text, Text, Text, Text, Text, Tex...","[Responding officers element (unit) number, Pr...",
1,Lottery Cash 4 Life Winning Numbers: Beginning...,https://data.ny.gov/d/kwxv-fwze,Go to http://on.ny.gov/1xRIvPz on the New York...,data.ny.gov,"[Cash Ball, Draw Date, Winning Numbers]","[Text, Calendar date, Text]","[Cash ball, Draw date, Winning numbers]",NY
2,Provisional COVID-19 Deaths by Sex and Age,https://data.cdc.gov/d/9bhg-hcku,"Effective September 27, 2023, this dataset wil...",data.cdc.gov,"[End Date, Influenza Deaths, Total Deaths, Age...","[Calendar date, Number, Number, Text, Text, Ca...","[Last date of data period, Influenza Deaths (I...",
3,Howard County Police Department Call For Servi...,https://opendata.howardcountymd.gov/d/qccx-65fg,Calls for Service by computer aided dispatch (...,opendata.howardcountymd.gov,"[Date_Reported, Computer_Aided_Dispatch_Event_...","[Calendar date, Text, Text, Number, Text, Text]","[, , , , , ]",
4,Health Care Provider Credential Data,https://data.wa.gov/d/qxh8-f4bd,The Washington State Department of Health pres...,data.wa.gov,"[Status, MiddleName, BirthYear, ActionTaken, L...","[Text, Text, Text, Text, Text, Text, Text, Tex...","[, , , , , , , , , , , ]",WA


In [11]:
df['api_link'] = df['link'] + ".json"

In [12]:
df['api_link'].head(20)

0       https://www.dallasopendata.com/d/9fxf-t2tr.json
1                  https://data.ny.gov/d/kwxv-fwze.json
2                 https://data.cdc.gov/d/9bhg-hcku.json
3     https://opendata.howardcountymd.gov/d/qccx-65f...
4                  https://data.wa.gov/d/qxh8-f4bd.json
5              https://datahub.hhs.gov/d/rxn6-qnx8.json
6        https://data.cityofnewyork.us/d/8wbx-tsch.json
7        https://data.cityofnewyork.us/d/vx8i-nprf.json
8     https://opendata.howardcountymd.gov/d/kvz2-j5c...
9        https://data.cityofnewyork.us/d/ic3t-wcy2.json
10            https://www.datos.gov.co/d/gt2j-8ykr.json
11                https://data.cdc.gov/d/nr4s-juj3.json
12      https://data.cityofchicago.org/d/xzkq-xp2w.json
13    https://opendata.howardcountymd.gov/d/xvpn-2pn...
14    https://opendata.howardcountymd.gov/d/f362-6fu...
15    https://opendata.howardcountymd.gov/d/8fxg-nyr...
16       https://data.cityofnewyork.us/d/dpec-ucu7.json
17    https://opendata.howardcountymd.gov/d/6uza

In [15]:
df['api_link'] = df['api_link'].str.replace('/d/', '/resource/')

In [16]:
df['api_link']

0       https://www.dallasopendata.com/resource/9fxf-t...
1             https://data.ny.gov/resource/kwxv-fwze.json
2            https://data.cdc.gov/resource/9bhg-hcku.json
3       https://opendata.howardcountymd.gov/resource/q...
4             https://data.wa.gov/resource/qxh8-f4bd.json
                              ...                        
9995     https://data.edmonton.ca/resource/p2tt-8mv9.json
9996         https://data.cdc.gov/resource/8i5t-42wz.json
9997          https://data.ct.gov/resource/4vva-amjy.json
9998    https://opendata.maryland.gov/resource/94gw-yf...
9999      https://gnb.socrata.com/resource/t6hf-gbks.json
Name: api_link, Length: 10000, dtype: object

In [17]:
api_link_1 = df.loc[0, 'api_link']

In [30]:
def fetch_data(api_link, timeout=5):
    try:
        # Attempt to fetch data from the API link with a timeout
        response = requests.get(api_link, timeout=timeout)
        response.raise_for_status()  # Raise an error for unsuccessful status codes
        return response.json()  # Return JSON data from the response
    except requests.exceptions.Timeout:
        print(f"Request to {api_link} timed out after {timeout} seconds.")
        return "Timeout"  # Return None for timeout cases
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {api_link}: {e}")
        return "ErrorFetchingData"  # Return None for other exceptions

In [24]:
from tqdm import tqdm

In [25]:
tqdm.pandas()

In [None]:
df['data'] = df['api_link'].progress_apply(fetch_data)

  0%|▎                                                                            | 35/10000 [00:21<5:02:04,  1.82s/it]

Request to https://data.cityofnewyork.us/resource/erm2-nwe9.json timed out after 5 seconds.


  1%|▋                                                                            | 87/10000 [01:05<6:08:58,  2.23s/it]

Request to https://datahub.austintexas.gov/resource/3syk-w9eu.json timed out after 5 seconds.


  2%|█▏                                                                          | 150/10000 [01:49<7:37:32,  2.79s/it]

Request to https://data.cityofchicago.org/resource/22u3-xenr.json timed out after 5 seconds.


  2%|█▏                                                                          | 155/10000 [01:55<6:01:39,  2.20s/it]

Request to https://data.cityofnewyork.us/resource/jz4z-kudi.json timed out after 5 seconds.


  2%|█▎                                                                          | 176/10000 [02:16<5:26:05,  1.99s/it]

Request to https://controllerdata.lacity.org/resource/pggv-e4fn.json timed out after 5 seconds.


  2%|█▌                                                                          | 206/10000 [02:40<5:03:45,  1.86s/it]

Request to https://data.cityofnewyork.us/resource/qgea-i56i.json timed out after 5 seconds.


  2%|█▋                                                                          | 218/10000 [02:50<5:04:59,  1.87s/it]

Request to https://data.cityofchicago.org/resource/wrvz-psew.json timed out after 5 seconds.


  3%|█▉                                                                          | 251/10000 [03:20<5:06:37,  1.89s/it]

Request to https://www.datos.gov.co/resource/mxk5-ce6w.json timed out after 5 seconds.


  3%|██                                                                          | 266/10000 [03:32<5:17:41,  1.96s/it]

Request to https://data.cityofnewyork.us/resource/8h9b-rp9u.json timed out after 5 seconds.


  3%|██▍                                                                           | 311/10000 [04:02<54:55,  2.94it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/a7v8-ei2f.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/a7v8-ei2f.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254178DB250>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


  3%|██▌                                                                         | 334/10000 [04:25<5:37:08,  2.09s/it]

Request to https://data.cityofnewyork.us/resource/5uac-w243.json timed out after 5 seconds.


  5%|███▊                                                                        | 504/10000 [06:24<5:35:55,  2.12s/it]

Request to https://data.cityofnewyork.us/resource/t29m-gskq.json timed out after 5 seconds.


  5%|████                                                                        | 532/10000 [06:48<5:27:42,  2.08s/it]

Request to https://data.cityofnewyork.us/resource/jgtb-hmpg.json timed out after 5 seconds.


  6%|████▏                                                                       | 554/10000 [07:05<4:46:55,  1.82s/it]

Request to https://data.cityofnewyork.us/resource/867j-5pgi.json timed out after 5 seconds.


  6%|████▍                                                                       | 588/10000 [07:38<5:01:22,  1.92s/it]

Request to https://data.cityofchicago.org/resource/hec5-y4x5.json timed out after 5 seconds.


  6%|████▌                                                                       | 608/10000 [07:55<5:05:48,  1.95s/it]

Request to https://data.cityofchicago.org/resource/u6pd-qa9d.json timed out after 5 seconds.


  6%|████▋                                                                       | 609/10000 [08:00<7:34:45,  2.91s/it]

Request to https://data.cityofchicago.org/resource/sxs8-h27x.json timed out after 5 seconds.


  6%|████▋                                                                       | 618/10000 [08:14<6:36:13,  2.53s/it]

Request to https://data.cityofchicago.org/resource/v6vf-nfxy.json timed out after 5 seconds.


  6%|████▊                                                                       | 639/10000 [08:31<4:57:09,  1.90s/it]

Request to https://www.datos.gov.co/resource/qhpu-8ixx.json timed out after 5 seconds.


  8%|█████▉                                                                      | 782/10000 [10:14<5:10:24,  2.02s/it]

Request to https://data.ny.gov/resource/7vem-aaz7.json timed out after 5 seconds.


  9%|███████                                                                     | 922/10000 [11:49<5:03:13,  2.00s/it]

Request to https://www.datos.gov.co/resource/w9zh-vetq.json timed out after 5 seconds.


 11%|████████▎                                                                  | 1103/10000 [14:23<4:56:15,  2.00s/it]

Request to https://data.ny.gov/resource/qzve-kjga.json timed out after 5 seconds.


 11%|████████▍                                                                  | 1123/10000 [14:41<4:58:21,  2.02s/it]

Request to https://data.cityofchicago.org/resource/fg6s-gzvg.json timed out after 5 seconds.


 11%|████████▍                                                                  | 1126/10000 [14:48<6:08:42,  2.49s/it]

Request to https://data.lacity.org/resource/3f9m-afei.json timed out after 5 seconds.


 12%|█████████                                                                  | 1210/10000 [15:50<5:04:38,  2.08s/it]

Request to https://data.edmonton.ca/resource/cnsu-iagr.json timed out after 5 seconds.


 13%|█████████▍                                                                 | 1257/10000 [16:19<4:27:54,  1.84s/it]

Request to https://www.datos.gov.co/resource/gpzw-wmxd.json timed out after 5 seconds.


 13%|█████████▌                                                                 | 1281/10000 [16:36<4:26:36,  1.83s/it]

Request to https://data.cityofnewyork.us/resource/bty7-2jhb.json timed out after 5 seconds.


 14%|██████████▎                                                                | 1382/10000 [17:50<4:55:20,  2.06s/it]

Request to https://data.cityofnewyork.us/resource/uq7m-95z8.json timed out after 5 seconds.


 14%|██████████▌                                                                | 1416/10000 [18:14<4:42:08,  1.97s/it]

Request to https://data.cityofnewyork.us/resource/i4gi-tjb9.json timed out after 5 seconds.


 14%|██████████▊                                                                | 1435/10000 [18:28<1:31:48,  1.55it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/ynf5-u8nk.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/ynf5-u8nk.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246700>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 15%|██████████▉                                                                | 1456/10000 [18:44<2:03:53,  1.15it/s]

Error fetching data from https://performance.cityofrc.us/resource/gznk-kkf7.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/gznk-kkf7.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672460A0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 15%|███████████                                                                | 1469/10000 [18:54<4:25:23,  1.87s/it]

Request to https://data.cityofnewyork.us/resource/tm6d-hbzd.json timed out after 5 seconds.


 15%|███████████                                                                | 1474/10000 [19:04<5:29:10,  2.32s/it]

Request to https://data.montgomerycountymd.gov/resource/k9nj-z35d.json timed out after 5 seconds.


 15%|███████████▌                                                               | 1536/10000 [19:42<1:34:00,  1.50it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/d7q7-hb2x.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/d7q7-hb2x.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002541BD75EE0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 17%|████████████▌                                                              | 1668/10000 [21:05<1:11:38,  1.94it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/c8pf-ybds.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/c8pf-ybds.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246700>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 17%|████████████▋                                                              | 1692/10000 [21:21<1:29:31,  1.55it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/nf48-gwd6.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/nf48-gwd6.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246880>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 17%|████████████▋                                                              | 1698/10000 [21:28<4:06:15,  1.78s/it]

Request to https://data.cityofnewyork.us/resource/2nwg-uqyg.json timed out after 5 seconds.


 17%|████████████▊                                                              | 1706/10000 [21:32<1:25:24,  1.62it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/8tjc-3ibv.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/8tjc-3ibv.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246100>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 17%|█████████████                                                              | 1738/10000 [22:04<4:10:46,  1.82s/it]

Request to https://data.cityofnewyork.us/resource/td5q-ry6d.json timed out after 5 seconds.


 18%|█████████████▎                                                             | 1767/10000 [22:28<4:24:14,  1.93s/it]

Request to https://datahub.transportation.gov/resource/8uv2-y4is.json timed out after 5 seconds.


 18%|█████████████▊                                                             | 1842/10000 [23:28<4:23:26,  1.94s/it]

Request to https://data.cityofchicago.org/resource/ujwc-724r.json timed out after 5 seconds.


 18%|█████████████▉                                                             | 1850/10000 [23:35<2:03:31,  1.10it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/pkdv-cwks.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/pkdv-cwks.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672461F0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 19%|██████████████▎                                                            | 1914/10000 [24:12<1:38:30,  1.37it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/98ww-3sks.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/98ww-3sks.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246100>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 19%|██████████████▉                                                              | 1943/10000 [24:29<58:19,  2.30it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/sz6i-wjcm.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/sz6i-wjcm.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246700>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 20%|██████████████▉                                                            | 1991/10000 [25:00<1:13:29,  1.82it/s]

Error fetching data from https://performance.cityofrc.us/resource/cnpb-8s3c.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/cnpb-8s3c.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246220>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 20%|███████████████                                                            | 2000/10000 [25:08<4:01:21,  1.81s/it]

Request to https://data.cityofnewyork.us/resource/sv2w-rv3k.json timed out after 5 seconds.


 20%|███████████████                                                            | 2006/10000 [25:18<5:04:18,  2.28s/it]

Request to https://data.readingpa.gov/resource/vicb-nrz9.json timed out after 5 seconds.


 20%|███████████████▏                                                           | 2033/10000 [25:40<4:12:00,  1.90s/it]

Request to https://data.cityofchicago.org/resource/bc6b-sq4u.json timed out after 5 seconds.


 21%|████████████████                                                           | 2134/10000 [26:57<4:13:46,  1.94s/it]

Request to https://data.vermont.gov/resource/vf3r-u4kv.json timed out after 5 seconds.


 22%|████████████████▎                                                          | 2183/10000 [27:40<4:28:18,  2.06s/it]

Request to https://cthru.data.socrata.com/resource/pegc-naaa.json timed out after 5 seconds.


 22%|████████████████▌                                                          | 2215/10000 [28:07<4:46:48,  2.21s/it]

Request to https://data.cityofnewyork.us/resource/92iy-9c3n.json timed out after 5 seconds.


 22%|████████████████▋                                                          | 2233/10000 [28:29<4:37:46,  2.15s/it]

Request to https://data.cityofnewyork.us/resource/nyis-y4yr.json timed out after 5 seconds.


 23%|████████████████▉                                                          | 2260/10000 [28:58<5:06:18,  2.37s/it]

Request to https://data.norfolk.gov/resource/ere7-kake.json timed out after 5 seconds.


 23%|█████████████████▎                                                         | 2308/10000 [29:51<5:08:12,  2.40s/it]

Request to https://www.datos.gov.co/resource/ur2p-h4yf.json timed out after 5 seconds.


 24%|█████████████████▉                                                         | 2389/10000 [30:52<4:19:11,  2.04s/it]

Request to https://data.cityofnewyork.us/resource/a5td-mswe.json timed out after 5 seconds.


 24%|██████████████████                                                         | 2407/10000 [31:13<4:15:24,  2.02s/it]

Request to https://www.datos.gov.co/resource/2x55-9wxm.json timed out after 5 seconds.


 24%|██████████████████                                                         | 2412/10000 [31:23<5:56:02,  2.82s/it]

Request to https://www.data.act.gov.au/resource/sepa-djsm.json timed out after 5 seconds.


 24%|██████████████████▎                                                        | 2435/10000 [31:42<4:10:43,  1.99s/it]

Request to https://datahub.transportation.gov/resource/icqf-xf4w.json timed out after 5 seconds.


 25%|██████████████████▉                                                        | 2527/10000 [32:46<3:53:13,  1.87s/it]

Request to https://data.nola.gov/resource/qarb-kkbj.json timed out after 5 seconds.


 26%|███████████████████▍                                                       | 2585/10000 [33:30<1:36:46,  1.28it/s]

Error fetching data from https://performance.cityofrc.us/resource/vd8t-gew9.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/vd8t-gew9.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246550>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 26%|███████████████████▍                                                       | 2587/10000 [33:35<3:19:04,  1.61s/it]

Request to https://www.datos.gov.co/resource/57sv-p2fu.json timed out after 5 seconds.


 27%|████████████████████▏                                                      | 2692/10000 [34:53<4:05:01,  2.01s/it]

Request to https://data.montgomerycountymd.gov/resource/vpf9-6irq.json timed out after 5 seconds.


 27%|████████████████████▌                                                      | 2742/10000 [35:32<3:45:05,  1.86s/it]

Request to https://data.cityofchicago.org/resource/ggws-77ih.json timed out after 5 seconds.


 28%|████████████████████▊                                                      | 2768/10000 [35:52<4:29:40,  2.24s/it]

Request to https://data.pa.gov/resource/dc5b-gebx.json timed out after 5 seconds.


 28%|████████████████████▊                                                      | 2778/10000 [36:01<3:53:22,  1.94s/it]

Request to https://data.cityofchicago.org/resource/n26f-ihde.json timed out after 5 seconds.


 28%|████████████████████▊                                                      | 2782/10000 [36:08<4:37:08,  2.30s/it]

Request to https://data.cityofnewyork.us/resource/n6c5-95xh.json timed out after 5 seconds.


 29%|██████████████████████▏                                                      | 2882/10000 [37:19<52:57,  2.24it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/jdvc-fysk.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/jdvc-fysk.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54D60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 31%|███████████████████████▏                                                   | 3090/10000 [39:27<3:54:47,  2.04s/it]

Request to https://data.texas.gov/resource/yrkr-maw5.json timed out after 5 seconds.


 32%|███████████████████████▋                                                   | 3159/10000 [40:08<1:37:51,  1.17it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/i3xt-vddf.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/i3xt-vddf.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002541646CD60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 32%|████████████████████████▏                                                  | 3218/10000 [40:50<3:34:23,  1.90s/it]

Request to https://www.datos.gov.co/resource/cwhv-7fnp.json timed out after 5 seconds.


 32%|████████████████████████▎                                                  | 3245/10000 [41:10<1:31:27,  1.23it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/5fdt-n5ne.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/5fdt-n5ne.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246220>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 34%|█████████████████████████▌                                                 | 3416/10000 [42:56<3:24:14,  1.86s/it]

Request to https://datahub.austintexas.gov/resource/xp28-5kft.json timed out after 5 seconds.


 34%|█████████████████████████▊                                                 | 3439/10000 [43:11<1:17:52,  1.40it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/e882-m97r.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/e882-m97r.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002541646C6D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 35%|██████████████████████████▋                                                  | 3465/10000 [43:25<49:18,  2.21it/s]

Error fetching data from https://data.miamigov.com/resource/6q9s-asrs.json: HTTPSConnectionPool(host='data.miamigov.com', port=443): Max retries exceeded with url: /resource/6q9s-asrs.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002541646C6D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 35%|██████████████████████████                                                 | 3482/10000 [43:37<3:23:34,  1.87s/it]

Request to https://data.ny.gov/resource/aee3-5gf5.json timed out after 5 seconds.
Error fetching data from https://priv-data.ojp.usdoj.gov/resource/gcuy-rt5g.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/gcuy-rt5g.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254178DBAC0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 35%|██████████████████████████▏                                                | 3487/10000 [43:45<3:45:39,  2.08s/it]

Request to https://data.edmonton.ca/resource/tq23-qn4m.json timed out after 5 seconds.


 35%|███████████████████████████▏                                                 | 3533/10000 [44:20<51:39,  2.09it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/7syt-ki9g.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/7syt-ki9g.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54CD0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 36%|██████████████████████████▋                                                | 3554/10000 [44:32<1:05:49,  1.63it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/imsf-b5s7.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/imsf-b5s7.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C545E0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 36%|███████████████████████████▌                                                 | 3572/10000 [44:40<41:54,  2.56it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/ak2j-ub9q.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/ak2j-ub9q.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54DF0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 36%|████████████████████████████                                                 | 3638/10000 [45:13<53:01,  2.00it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/esbe-p7rh.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/esbe-p7rh.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54E50>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 38%|████████████████████████████▌                                              | 3813/10000 [47:04<3:11:12,  1.85s/it]

Request to https://data.ny.gov/resource/ku8b-uzf5.json timed out after 5 seconds.


 40%|█████████████████████████████▉                                             | 3991/10000 [49:00<3:14:32,  1.94s/it]

Request to https://data.cityofnewyork.us/resource/c9sj-fmsg.json timed out after 5 seconds.


 41%|██████████████████████████████▌                                            | 4074/10000 [49:55<3:17:55,  2.00s/it]

Request to https://data.montgomerycountymd.gov/resource/pv7j-pdxw.json timed out after 5 seconds.


 41%|██████████████████████████████▌                                            | 4075/10000 [49:55<2:39:34,  1.62s/it]

Error fetching data from https://data.miamigov.com/resource/kyut-b7du.json: HTTPSConnectionPool(host='data.miamigov.com', port=443): Max retries exceeded with url: /resource/kyut-b7du.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54DC0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 41%|██████████████████████████████▉                                            | 4133/10000 [50:36<3:09:50,  1.94s/it]

Request to https://health.data.ny.gov/resource/jizq-disf.json timed out after 5 seconds.


 42%|████████████████████████████████▋                                            | 4243/10000 [51:44<43:33,  2.20it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/a3x8-ymyv.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/a3x8-ymyv.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54C10>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 43%|█████████████████████████████████                                            | 4287/10000 [52:08<54:24,  1.75it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/kwm9-bqsn.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/kwm9-bqsn.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 44%|████████████████████████████████▊                                          | 4378/10000 [53:06<3:19:41,  2.13s/it]

Request to https://data.norfolk.gov/resource/wqxq-hhe6.json timed out after 5 seconds.


 45%|█████████████████████████████████▊                                         | 4504/10000 [54:22<3:17:16,  2.15s/it]

Request to https://bythenumbers.sco.ca.gov/resource/ju3w-4gxp.json timed out after 5 seconds.


 46%|███████████████████████████████████▌                                         | 4625/10000 [55:18<35:06,  2.55it/s]

Error fetching data from https://performance.cityofrc.us/resource/6ad2-q4p2.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/6ad2-q4p2.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672461F0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 47%|███████████████████████████████████                                        | 4670/10000 [55:51<2:56:16,  1.98s/it]

Request to https://data.cityofchicago.org/resource/cygx-ui4j.json timed out after 5 seconds.


 48%|███████████████████████████████████▋                                       | 4761/10000 [56:54<2:40:11,  1.83s/it]

Request to https://data.norfolk.gov/resource/nbyu-xjez.json timed out after 5 seconds.


 48%|███████████████████████████████████▋                                       | 4766/10000 [56:56<1:02:49,  1.39it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/r4j4-fdwx.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/r4j4-fdwx.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672468B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 48%|███████████████████████████████████▊                                       | 4783/10000 [57:11<2:46:34,  1.92s/it]

Request to https://datacatalog.cookcountyil.gov/resource/7pny-nedm.json timed out after 5 seconds.


 50%|█████████████████████████████████████▎                                     | 4970/10000 [59:15<2:34:22,  1.84s/it]

Request to https://data.ct.gov/resource/um73-fxm4.json timed out after 5 seconds.


 51%|█████████████████████████████████████▉                                     | 5066/10000 [1:00:07<38:12,  2.15it/s]

Error fetching data from https://performance.cityofrc.us/resource/csg5-5s49.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/csg5-5s49.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672468B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 51%|██████████████████████████████████████▍                                    | 5117/10000 [1:01:00<39:04,  2.08it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/qiz2-ifwz.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/qiz2-ifwz.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025467246550>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 52%|██████████████████████████████████████▊                                    | 5175/10000 [1:01:30<42:18,  1.90it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/gkck-euys.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/gkck-euys.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672460A0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 53%|███████████████████████████████████████▌                                   | 5274/10000 [1:02:25<31:41,  2.49it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/ya4e-n9zp.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/ya4e-n9zp.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000254672466D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 53%|██████████████████████████████████████▊                                  | 5310/10000 [1:02:55<2:23:17,  1.83s/it]

Request to https://data.cityofnewyork.us/resource/quxm-hmyr.json timed out after 5 seconds.


 53%|██████████████████████████████████████▉                                  | 5339/10000 [1:03:14<2:26:20,  1.88s/it]

Request to https://data.ny.gov/resource/bjcb-yee3.json timed out after 5 seconds.


 55%|████████████████████████████████████████▏                                | 5501/10000 [1:04:56<2:33:49,  2.05s/it]

Request to https://data.cdc.gov/resource/e28h-tx85.json timed out after 5 seconds.


 57%|█████████████████████████████████████████▉                               | 5738/10000 [1:08:13<2:29:55,  2.11s/it]

Request to https://datahub.usaid.gov/resource/a34s-3yxd.json timed out after 5 seconds.


 58%|███████████████████████████████████████████▋                               | 5826/10000 [1:09:13<33:29,  2.08it/s]

Error fetching data from https://priv-data.ojp.usdoj.gov/resource/vigs-nsnz.json: HTTPSConnectionPool(host='priv-data.ojp.usdoj.gov', port=443): Max retries exceeded with url: /resource/vigs-nsnz.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C54DC0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 59%|██████████████████████████████████████████▊                              | 5864/10000 [1:09:41<2:34:54,  2.25s/it]

Request to https://data.cityofnewyork.us/resource/qmh3-uvgq.json timed out after 5 seconds.


 59%|████████████████████████████████████████████▏                              | 5886/10000 [1:09:51<33:37,  2.04it/s]

Error fetching data from https://stat.montgomerycountymd.gov/resource/4wau-rn5x.json: [Errno Expecting value] 
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head id="ctl00_Head1"><title>
	CountyStat Home Page - Office of Management and Budget - Montgomery County, Maryland
</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta http-equiv="X-UA-Compatible" content="IE=Edge" />    
	<script async src="https://www.googletagmanager.com/gtag/js?id=G-K9STEFSE4V"></script>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css" integrity="sha512-z3gLpd7yknf1YoNbCzqRKc4qyor8gaKU1qmn+CShxbuBusANI9QpRohGBreCFkKxLhei6S9CQXFEbbKuqLg0DA==" crossorigin="anonymous" referrerpolicy="no-referrer" /><link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css" />
    <script 

 61%|████████████████████████████████████████████▏                            | 6057/10000 [1:11:37<2:03:30,  1.88s/it]

Request to https://data.brla.gov/resource/sfeg-d9ip.json timed out after 5 seconds.


 61%|████████████████████████████████████████████▎                            | 6073/10000 [1:11:50<1:58:48,  1.82s/it]

Request to https://data.wa.gov/resource/fvrz-yz45.json timed out after 5 seconds.


 61%|█████████████████████████████████████████████▋                             | 6087/10000 [1:12:00<35:35,  1.83it/s]

Error fetching data from https://data.miamigov.com/resource/7ey5-m434.json: HTTPSConnectionPool(host='data.miamigov.com', port=443): Max retries exceeded with url: /resource/7ey5-m434.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002541646C700>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 62%|█████████████████████████████████████████████▎                           | 6211/10000 [1:14:42<2:15:47,  2.15s/it]

Request to https://stat.stpete.org/resource/qdms-3kn3.json timed out after 5 seconds.


 64%|██████████████████████████████████████████████▉                          | 6434/10000 [1:17:18<2:06:22,  2.13s/it]

Request to https://data.cdc.gov/resource/vgc8-iyc4.json timed out after 5 seconds.


 65%|███████████████████████████████████████████████▌                         | 6508/10000 [1:18:15<1:53:09,  1.94s/it]

Request to https://data.cityofnewyork.us/resource/gsgx-6efw.json timed out after 5 seconds.


 67%|████████████████████████████████████████████████▉                        | 6705/10000 [1:23:21<1:45:26,  1.92s/it]

Request to https://www.datos.gov.co/resource/hds9-4524.json timed out after 5 seconds.


 68%|█████████████████████████████████████████████████▋                       | 6806/10000 [1:24:18<1:40:58,  1.90s/it]

Request to https://data.wa.gov/resource/bzff-4fmt.json timed out after 5 seconds.


 70%|██████████████████████████████████████████████████▊                      | 6955/10000 [1:25:46<1:35:20,  1.88s/it]

Request to https://data.ct.gov/resource/qem9-rt8k.json timed out after 5 seconds.


 71%|███████████████████████████████████████████████████▋                     | 7076/10000 [1:30:52<1:34:40,  1.94s/it]

Request to https://mydata.iadb.org/resource/itsw-5hnr.json timed out after 5 seconds.


 73%|█████████████████████████████████████████████████████▎                   | 7297/10000 [1:33:39<1:27:18,  1.94s/it]

Request to https://data.cityofnewyork.us/resource/4fwc-j3vn.json timed out after 5 seconds.


 73%|█████████████████████████████████████████████████████▍                   | 7316/10000 [1:34:00<1:29:43,  2.01s/it]

Request to https://data.cityofnewyork.us/resource/nbun-wekj.json timed out after 5 seconds.


 74%|█████████████████████████████████████████████████████▉                   | 7381/10000 [1:34:41<1:41:58,  2.34s/it]

Request to https://data.edmonton.ca/resource/ypje-j649.json timed out after 5 seconds.


 74%|█████████████████████████████████████████████████████▉                   | 7382/10000 [1:34:47<2:18:50,  3.18s/it]

Request to https://data.cambridgema.gov/resource/88at-7ucq.json timed out after 5 seconds.


 74%|█████████████████████████████████████████████████████▉                   | 7394/10000 [1:35:00<1:29:10,  2.05s/it]

Request to https://data.wa.gov/resource/irc2-87d5.json timed out after 5 seconds.


 75%|████████████████████████████████████████████████████████▌                  | 7537/10000 [1:36:35<46:07,  1.12s/it]

Error fetching data from https://performance.cityofrc.us/resource/3zhc-t25i.json: HTTPSConnectionPool(host='performance.cityofrc.us', port=443): Max retries exceeded with url: /resource/3zhc-t25i.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000025459C545B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))


 76%|███████████████████████████████████████████████████████▌                 | 7605/10000 [1:37:39<1:23:00,  2.08s/it]

Request to https://data.ny.gov/resource/qym9-xzj6.json timed out after 5 seconds.


 77%|████████████████████████████████████████████████████████▍                | 7728/10000 [1:39:04<1:20:29,  2.13s/it]

Request to https://mydata.iadb.org/resource/sjty-9qzs.json timed out after 5 seconds.


 78%|████████████████████████████████████████████████████████▉                | 7796/10000 [1:40:16<2:21:59,  3.87s/it]

Request to https://www.data.act.gov.au/resource/jxpp-4iiz.json timed out after 5 seconds.


 79%|█████████████████████████████████████████████████████████▋               | 7894/10000 [1:41:32<1:08:37,  1.95s/it]

Request to https://data.wa.gov/resource/ydb3-3dnq.json timed out after 5 seconds.


 79%|███████████████████████████████████████████████████████████▍               | 7918/10000 [1:41:48<22:31,  1.54it/s]

Error fetching data from https://stat.montgomerycountymd.gov/resource/2tcf-unsy.json: [Errno Expecting value] 
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head id="ctl00_Head1"><title>
	CountyStat Home Page - Office of Management and Budget - Montgomery County, Maryland
</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta http-equiv="X-UA-Compatible" content="IE=Edge" />    
	<script async src="https://www.googletagmanager.com/gtag/js?id=G-K9STEFSE4V"></script>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css" integrity="sha512-z3gLpd7yknf1YoNbCzqRKc4qyor8gaKU1qmn+CShxbuBusANI9QpRohGBreCFkKxLhei6S9CQXFEbbKuqLg0DA==" crossorigin="anonymous" referrerpolicy="no-referrer" /><link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css" />
    <script 

 82%|███████████████████████████████████████████████████████████▊             | 8197/10000 [1:50:00<1:00:15,  2.01s/it]

Request to https://www.data.act.gov.au/resource/92fy-xvmy.json timed out after 5 seconds.


 83%|██████████████████████████████████████████████████████████████▍            | 8322/10000 [1:51:26<10:51,  2.58it/s]

Error fetching data from https://stat.montgomerycountymd.gov/resource/fdyk-etfp.json: [Errno Expecting value] 
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head id="ctl00_Head1"><title>
	CountyStat Home Page - Office of Management and Budget - Montgomery County, Maryland
</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta http-equiv="X-UA-Compatible" content="IE=Edge" />    
	<script async src="https://www.googletagmanager.com/gtag/js?id=G-K9STEFSE4V"></script>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css" integrity="sha512-z3gLpd7yknf1YoNbCzqRKc4qyor8gaKU1qmn+CShxbuBusANI9QpRohGBreCFkKxLhei6S9CQXFEbbKuqLg0DA==" crossorigin="anonymous" referrerpolicy="no-referrer" /><link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css" />
    <script 

 84%|███████████████████████████████████████████████████████████████▎           | 8439/10000 [1:52:47<27:40,  1.06s/it]