In [13]:
import pandas as pd
import numpy as np
import concurrent.futures
import requests
import cloudscraper
import time
from requests.exceptions import RequestException


In [25]:
df = pd.read_csv('raw_dataset.csv')
df = df.drop(['gravity'], axis=1).sample(frac=0.001)
df['gravity'] = 'none'

In [26]:
df.shape

(18, 28)

In [27]:
def get_gravity(url):
    scraper = cloudscraper.create_scraper()
    max_retries = 1
    for attempt in range(max_retries):
        try:
            response = scraper.get(url)
            if response.status_code == 200:
                data = response.json()  # Parse the JSON response
                return data.get('predictedGravity', 'none')
            else:
                # print(f"Request failed with status code: {response.status_code}")
                # time.sleep(180)  # Wait a bit before retrying
                return 'none'
        except RequestException as e:
            # print(f"Attempt {attempt + 1} for URL {url} failed: {e}")
            if attempt == max_retries - 1:
                return 'none'

# Function to apply multithreading for concurrent requests
def fetch_gravity_concurrently(urls):
    results = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_url = {executor.submit(get_gravity, url): url for url in urls}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                result = future.result()
                results.append(result)
                print(len(results), result)
            except Exception as exc:
                # print(f'{url} generated an exception: {exc}')
                results.append('none')
    return results
# Main execution

if __name__ == "__main__":
    mask = df['gravity'] == 'none'
    urls = df.loc[mask, 'url'].tolist()  # Convert the URL series to a list
    gravity_results = fetch_gravity_concurrently(urls)
    df.loc[mask, 'gravity'] = gravity_results

1 978885.87
2 979167.895
3 979168.79
4 978188.324
5 978873.604
6 979288.456
7 979173.866
8 978757.857
9 980304.139
10 978012.076
11 980180.818
12 979567.493
13 978513.536
14 980505.199
15 978693.089
16 978078.555
17 977983.422
18 978486.967


In [28]:
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,...,depthError,magError,magNst,status,locationSource,magSource,distance,url,force,gravity
1624,1624,18146,2011-09-20 02:40:22,-4.532,-105.207,10.0,5.7,mwc,154.0,109.7,...,,,,reviewed,us,gcmt,394480,https://geodesy.noaa.gov/api/gravd/gp?lat=-4.5...,1.881471e+20,978885.87
15488,15488,310423,2022-07-12 21:47:56,-22.4641,-114.0216,10.0,5.6,mww,120.0,42.0,...,1.487,0.048,41.0,reviewed,us,us,357478,https://geodesy.noaa.gov/api/gravd/gp?lat=-22....,2.291125e+20,979167.895
14312,14312,286551,2021-08-25 00:28:32,-27.4895,-69.2363,96.77,5.1,mww,,34.0,...,3.5,0.061,26.0,reviewed,us,us,390232,https://geodesy.noaa.gov/api/gravd/gp?lat=-27....,1.922657e+20,979168.79
11385,11385,219295,2019-06-21 08:40:38,-30.8528,-177.5218,10.0,5.1,mb,,81.0,...,1.9,0.106,29.0,reviewed,us,us,402399,https://geodesy.noaa.gov/api/gravd/gp?lat=-30....,1.808147e+20,978188.324
17133,17133,345840,2023-06-02 04:31:31,-23.7819,171.8772,10.0,5.1,mww,50.0,103.0,...,1.512,0.08,15.0,reviewed,us,us,378613,https://geodesy.noaa.gov/api/gravd/gp?lat=-23....,2.042474e+20,978873.604
10533,10533,202012,2018-10-16 02:10:13,44.2022,82.5805,25.12,5.2,mww,,18.0,...,3.8,0.127,6.0,reviewed,us,us,402338,https://geodesy.noaa.gov/api/gravd/gp?lat=44.2...,1.808695e+20,979288.456
13984,13984,281131,2021-06-30 15:07:54,-28.3361,-113.1267,10.0,5.3,mww,,46.0,...,1.8,0.098,10.0,reviewed,us,us,391437,https://geodesy.noaa.gov/api/gravd/gp?lat=-28....,1.910838e+20,979173.866
4159,4159,57811,2013-09-24 14:08:32,27.4939,65.6874,20.97,5.2,mb,,23.0,...,2.7,0.05,,reviewed,us,us,398027,https://geodesy.noaa.gov/api/gravd/gp?lat=27.4...,1.848087e+20,978757.857
15092,15092,302410,2022-03-22 18:10:50,23.4597,121.5099,11.79,5.1,mb,,96.0,...,4.2,0.074,66.0,reviewed,us,us,370203,https://geodesy.noaa.gov/api/gravd/gp?lat=23.4...,2.136327e+20,980304.139
7998,7998,139297,2016-10-17 06:14:58,-6.0033,148.8871,42.0,6.8,mww,,11.0,...,1.8,,,reviewed,us,us,357934,https://geodesy.noaa.gov/api/gravd/gp?lat=-6.0...,2.285292e+20,978012.076
