In [1]:
# import required library

In [1]:
import numpy as np
import pandas as pd
import os
import glob
import concurrent.futures
import requests
import cloudscraper
import time
from requests.exceptions import RequestException

### earthquake dataset form usgs

In [2]:
#  Data collection begains with the downloading the dataset form USGS, the USGS provides science for a changing world, which reflects and responds to society’s continuously evolving needs. As the science arm of the Department of the Interior, the USGS brings an array of earth, water, biological, and mapping data and expertise to bear in support of decision-making on environmental, resource, and public safety issues. 
# url = 'https://earthquake.usgs.gov/earthquakes/search/#%7B%22currentfeatureid%22%3Anull%2C%22mapposition%22%3A%5B%5B-86.46756%2C-537.1875%5D%2C%5B86.44583%2C160.3125%5D%5D%2C%22autoUpdate%22%3A%5B%22autoUpdate%22%5D%2C%22feed%22%3A%22undefined_undefined%22%2C%22listFormat%22%3A%22default%22%2C%22restrictListToMap%22%3A%5B%5D%2C%22sort%22%3A%22newest%22%2C%22basemap%22%3A%22grayscale%22%2C%22overlays%22%3A%5B%22plates%22%5D%2C%22distanceUnit%22%3A%22km%22%2C%22timezone%22%3A%22local%22%2C%22viewModes%22%3A%5B%22settings%22%2C%22map%22%5D%2C%22event%22%3Anull%2C%22search%22%3Anull%7D'
# from this source ive downloaded dataset from year 2011 to present date which is 2024 jan.
# from the website we can download less than 20000 entries at a time and there are more thatn 20k lables in a year 
# so there are several dataset that i ve to import and concat.

In [3]:
# Function to read CSV files using pandas. where path is the location of the file in the local storage 

def read_csv(path):
    return pd.read_csv(path)



In [4]:
# To read all CSV files from a specific location using the glob.glob method 

csv_files = glob.glob('./dataset/first_data/*.{}'.format('csv'))
csv_files
# these are the list of usefull CSV dataframe 

[]

In [6]:
#concating all the csv dataset in to one 

df_concat = pd.concat([read_csv(f) for f in csv_files ], ignore_index=True)

In [7]:
# taking copy of the orginal dataframe 

df = df_concat.copy()
df.columns

Index(['Unnamed: 0', 'time', 'latitude', 'longitude', 'depth', 'mag',
       'magType', 'nst', 'gap', 'dmin', 'rms', 'net', 'id', 'updated', 'place',
       'type', 'horizontalError', 'depthError', 'magError', 'magNst', 'status',
       'locationSource', 'magSource', 'date', 'distance'],
      dtype='object')

In [8]:
# sorting the dataframe by the time column because during concatining the files entries are ultered

df = df.sort_values(by='time').reset_index(drop = True)

In [9]:
df['time'] = df['time'].str.slice(0,19)

In [10]:
df['time'] = df['time'].str.replace('T',' ')

In [11]:
df.shape

(363898, 25)

In [12]:
# converting the time features in to the readable format to scrape distance of moon by the time 
df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%d %H:%M:%S')

In [13]:
df['time'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 363898 entries, 0 to 363897
Series name: time
Non-Null Count   Dtype         
--------------   -----         
363898 non-null  datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 2.8 MB


### filtering the dataset based on requirement for the research 

In [14]:
# there are different types of recorded sesmic waves here we are working on earthquake dataset so removing oter types of waves
df = df[df['type'].isin(['earthquake'])]
df.shape


(360471, 25)

In [15]:
df = df.sort_values(by='time')

In [16]:
df = df[df['mag'] > 5]

In [17]:
#Removing unnecassary features for the trainging and testing dataset.
drop_list=[
    'Unnamed: 0', 'distance']
df = df.drop(columns=drop_list, axis=1)

### Webscraping Distance 

In [18]:
from skyfield.api import Loader, Topos
from datetime import datetime

def get_distance_earth_moon(date_time):
    # Load the ephemeris data
    load = Loader('~/skyfield-data')
    planets = load('de421.bsp')
    earth, moon = planets['earth'], planets['moon']

    # Get the position of the Moon relative to Earth
    ts = load.timescale()
    t = ts.utc(date_time.year, date_time.month, date_time.day, date_time.hour, date_time.minute, date_time.second)
    astrometric = (moon - earth).at(t)

    # Get the distance between Earth and Moon
    distance_km = astrometric.distance().km
#     print(distance_km)
    return distance_km



# date_time = datetime(2024, 5, 27, 12, 0, 0)  # Date and time you want to calculate the distance for

df['distance'] = df['time'].apply(get_distance_earth_moon)

In [19]:
df['distance'] = df['distance'].astype(int)

In [22]:
df['distance']

(18059,)

### webscraping gravity 

In [25]:
def get_url(lat, lon):
    url = str(f'https://geodesy.noaa.gov/api/gravd/gp?lat={lat}&lon={lon}&eht=100.0')
#     print(url)
    return url
get_url(df['latitude'].iloc[0], df['longitude'].iloc[0])

'https://geodesy.noaa.gov/api/gravd/gp?lat=-26.803&lon=-63.136&eht=100.0'

In [26]:
df['url'] = np.vectorize(get_url)(df['latitude'], df['longitude'])

'https://geodesy.noaa.gov/api/gravd/gp?lat=-26.803&lon=-63.136&eht=100.0'

In [31]:


# to fetch gravity data from multiple URLs concurrently using multithreading. The provided code defines a function get_gravity to retrieve gravity data from 
# a single URL and another function fetch_gravity_concurrently to apply multithreading for concurrent requests.

def get_gravity(url):
    scraper = cloudscraper.create_scraper()
    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = scraper.get(url)
            if response.status_code == 200:
                data = response.json()  # Parse the JSON response
                return data.get('predictedGravity', 'none')
            else:
                print(f"Request failed with status code: {response.status_code}")
                time.sleep(180)  # Wait a bit before retrying
                return 'none'
        except RequestException as e:
            print(f"Attempt {attempt + 1} for URL {url} failed: {e}")
            if attempt == max_retries - 1:
                return 'none'
            
# Function to apply multithreading for concurrent requests
def fetch_gravity_concurrently(urls):
    results = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_url = {executor.submit(get_gravity, url): url for url in urls}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                result = future.result()
                results.append(result)
                print(len(results), result)
            except Exception as exc:
                print(f'{url} generated an exception: {exc}')
                results.append('none')
    return results

# Main execution
if __name__ == "__main__":
    urls = df['url'].tolist()
    df['gravity'] = fetch_gravity_concurrently(urls)

1 981862.266
2 979339.751
3 980603.909
4 979062.489
5 979405.04
6 981677.599
7 979064.936
8 978078.853
9 980085.219
10 978162.952
11 979172.645
12 980245.793
13 978851.349
14 978462.133
15 978043.595
16 981243.317
17 978094.21
18 978595.9
19 978479.673
20 980095.9
21 978610.845
22 978017.946
23 978762.68
24 981523.589
25 979037.905
26 978622.03
27 979827.172
28 978575.018
29 978562.696
30 979021.346
31 977948.697
32 978568.14
33 979290.925
34 980272.237
35 978459.382
36 979719.195
37 981318.328
38 979799.109
39 978453.848
40 978745.619
41 978523.168
42 979794.981
43 978533.626
44 978648.455
45 978366.604
46 978172.346
47 978009.528
48 979008.154
49 978194.384
50 978459.17
51 978514.483
52 978553.212
53 978521.884
54 978668.117
55 979850.134
56 981664.438
57 978536.671
58 978477.955
59 978543.353
60 978522.989
61 978446.296
62 978512.679
63 978397.595
64 978449.63
65 979110.276
66 978785.632
67 978522.573
68 978543.214
69 978176.072
70 978483.097
71 978566.901
72 978130.515
73 978449.96

559 979779.197
560 979739.878
561 980174.978
562 979818.675
563 980141.277
564 980018.414
565 979807.305
566 980048.731
567 979956.699
568 980161.795
569 979708.027
570 978108.356
571 979728.272
572 979808.791
573 979839.64
574 979903.227
575 979722.838
576 979678.296
577 979745.968
578 979888.218
579 980019.185
580 980164.442
581 979995.731
582 978900.399
583 980077.813
584 979773.71
585 980194.113
586 978903.742
587 979755.976
588 979673.577
589 979834.807
590 979874.401
591 980187.206
592 979964.706
593 980151.711
594 978008.26
595 979829.5
596 980152.557
597 980178.552
598 980058.556
599 980028.362
600 978907.752
601 979725.113
602 979966.25
603 980011.724
604 979707.32
605 979705.979
606 979801.196
607 979992.807
608 979679.85
609 979803.357
610 980074.226
611 979742.788
612 979751.948
613 979702.8
614 979836.435
615 979671.99
616 979805.966
617 980061.51
618 980117.228
619 979960.751
620 979855.979
621 979700.924
622 980178.066
623 980044.446
624 980003.351
625 979883.862
626 979

1104 978325.934
1105 980494.18
1106 978123.96
1107 977984.88
1108 978567.159
1109 981011.43
1110 980237.696
1111 978034.182
1112 978427.311
1113 978601.229
1114 978068.948
1115 981672.633
1116 978380.793
1117 978389.101
1118 978386.611
1119 980534.25
1120 978309.732
1121 981567.39
1122 978424.044
1123 979364.101
1124 978023.457
1125 978075.275
1126 979914.58
1127 980117.346
1128 979049.219
1129 980489.367
1130 977883.229
1131 978605.809
1132 981730.162
1133 978053.919
1134 979196.881
1135 979837.401
1136 980193.034
1137 980494.485
1138 981397.919
1139 981983.269
1140 978065.324
1141 978176.358
1142 978255.437
1143 979578.39
1144 982197.922
1145 978165.742
1146 978384.71
1147 981799.32
1148 978173.873
1149 978838.59
1150 979885.582
1151 977906.834
1152 982170.525
1153 980700.196
1154 978175.697
1155 978059.498
1156 981713.356
1157 980307.11
1158 978446.468
1159 978701.808
1160 978015.212
1161 981018.902
1162 980038.902
1163 978060.316
1164 978290.53
1165 981556.953
1166 978636.118
1167 

1622 978012.076
1623 980207.619
1624 980850.469
1625 980003.889
1626 978370.564
1627 978098.659
1628 978392.167
1629 978414.202
1630 980060.886
1631 980247.929
1632 977917.827
1633 978132.316
1634 978758.32
1635 979969.986
1636 978228.839
1637 978138.107
1638 978483.21
1639 978514.053
1640 979833.19
1641 978511.594
1642 978114.885
1643 978247.366
1644 980001.57
1645 978521.839
1646 978522.038
1647 978808.574
1648 978670.486
1649 982178.557
1650 977904.387
1651 978718.928
1652 980932.082
1653 981226.238
1654 979552.817
1655 978701.917
1656 978500.248
1657 978865.578
1658 978117.654
1659 978245.112
1660 980003.644
1661 980082.868
1662 979991.946
1663 978057.993
1664 981052.514
1665 978228.889
1666 979956.14
1667 979095.354
1668 980027.43
1669 978110.952
1670 981747.755
1671 981750.971
1672 981780.727
1673 981061.327
1674 978014.187
1675 977907.768
1676 978286.797
1677 978315.0
1678 981055.202
1679 979623.072
1680 978452.015
1681 978840.62
1682 978955.053
1683 977974.613
1684 978417.745
1

2139 978947.343
2140 978235.784
2141 978406.761
2142 978477.115
2143 980911.818
2144 978621.401
2145 978263.0
2146 977897.001
2147 980130.053
2148 978007.933
2149 978283.988
2150 978272.617
2151 978317.195
2152 978241.253
2153 978686.569
2154 978203.085
2155 978203.484
2156 978156.474
2157 982922.058
2158 978193.662
2159 978182.798
2160 979261.729
2161 978239.76
2162 980276.099
2163 978215.104
2164 981485.933
2165 978021.455
2166 979923.56
2167 978004.538
2168 978063.194
2169 977989.187
2170 981703.064
2171 980417.504
2172 979513.369
2173 978015.792
2174 981605.975
2175 978156.661
2176 978749.779
2177 978538.094
2178 979932.729
2179 978103.853
2180 979595.453
2181 979912.602
2182 980185.287
2183 981018.832
2184 978206.206
2185 979806.925
2186 978609.651
2187 979813.485
2188 978068.512
2189 979750.089
2190 978322.675
2191 980214.914
2192 978157.499
2193 977826.138
2194 978644.244
2195 978331.287
2196 979476.077
2197 978101.691
2198 980010.216
2199 980332.216
2200 980460.321
2201 978343.

2656 977994.967
2657 979269.156
2658 979618.182
2659 981764.338
2660 977847.195
2661 981516.967
2662 978538.698
2663 979410.66
2664 980651.512
2665 980632.532
2666 980095.851
2667 978307.445
2668 977957.51
2669 977972.217
2670 978162.38
2671 980493.53
2672 978459.917
2673 977991.014
2674 979745.39
2675 978894.354
2676 978007.429
2677 978376.529
2678 980491.366
2679 979091.12
2680 979102.712
2681 980515.023
2682 978479.716
2683 978318.101
2684 981692.393
2685 978839.859
2686 980684.023
2687 978594.145
2688 978955.103
2689 978108.245
2690 978017.824
2691 980511.898
2692 979536.47
2693 980920.275
2694 978599.438
2695 981551.991
2696 978569.089
2697 980663.377
2698 980180.368
2699 979905.203
2700 978714.352
2701 978099.478
2702 980158.931
2703 980103.132
2704 981674.946
2705 978093.062
2706 978079.006
2707 978699.566
2708 981111.306
2709 978712.981
2710 981153.538
2711 981021.554
2712 978873.044
2713 977950.933
2714 978099.309
2715 978890.965
2716 977991.025
2717 978434.864
2718 980027.601

3171 981019.804
3172 978347.973
3173 978896.552
3174 978732.13
3175 978150.727
3176 978708.525
3177 978615.877
3178 981459.221
3179 979623.982
3180 978063.811
3181 981420.272
3182 978020.288
3183 979832.24
3184 979827.689
3185 981761.205
3186 979833.635
3187 978803.975
3188 981842.925
3189 978365.662
3190 978129.802
3191 978347.798
3192 979835.6
3193 979842.171
3194 979797.485
3195 980041.731
3196 979870.231
3197 978267.242
3198 978135.995
3199 978480.725
3200 978157.507
3201 978358.3
3202 978142.357
3203 977906.641
3204 979807.729
3205 978137.503
3206 979008.61
3207 979824.341
3208 978314.388
3209 978455.082
3210 979487.481
3211 977904.489
3212 979999.289
3213 977829.646
3214 978097.185
3215 978463.115
3216 981408.988
3217 979803.044
3218 978333.653
3219 978405.923
3220 978259.41
3221 979363.329
3222 978644.706
3223 983019.985
3224 977839.899
3225 978261.545
3226 978192.501
3227 978025.771
3228 981162.061
3229 979799.087
3230 978029.824
3231 978318.583
3232 980017.714
3233 978207.654


3689 977876.996
3690 978023.604
3691 978288.247
3692 978216.985
3693 978193.42
3694 979154.695
3695 979133.703
3696 980032.739
3697 978881.674
3698 978097.734
3699 978004.417
3700 979697.299
3701 978228.696
3702 980166.504
3703 979114.765
3704 978930.975
3705 978940.81
3706 978221.807
3707 979188.251
3708 978939.982
3709 979229.71
3710 978087.78
3711 980890.286
3712 981403.523
3713 981027.057
3714 981040.676
3715 979633.682
3716 978134.884
3717 978431.086
3718 980960.615
3719 979173.514
3720 981006.27
3721 978520.789
3722 979180.386
3723 979185.759
3724 981125.911
3725 981019.989
3726 978257.451
3727 979452.503
3728 978096.442
3729 979579.169
3730 979380.287
3731 978513.122
3732 979092.327
3733 981337.892
3734 981168.75
3735 979818.763
3736 978692.944
3737 978677.437
3738 979257.389
3739 978562.933
3740 979067.62
3741 979539.925
3742 978440.87
3743 979137.31
3744 979486.731
3745 977999.287
3746 978048.992
3747 978052.903
3748 979909.076
3749 978367.988
3750 982285.262
3751 978644.483
3

In [32]:
url = df['url'].iloc[0:10]
fetch_gravity_concurrently(url)


In [None]:
df

### calculating force between earth and moon

In [None]:
# this is mean value of the gravity of the area where earth quake occours most in (mGal). The gravity anomaly at a location on the Earth's surface is the difference between the observed value of gravity and the value predicted by a theoretical model.
gravity = 979691.0243503251

# G is the universal gravitational constant 6.67 X 10-11N.m2/kg2.  which can be represented as 6.67e-11.
G = 6.67e-11
# the mass of moon in the kg
moon_mass =  7.35e22

# this is mass of the eart in the kg
earth_mass = 5.9722e24



In [None]:
# by usning sample dataframe the force acting at the time of the event of earth quake is calculated, 
# Calculate force
force = (G * earth_mass * moon_mass) / (df['distance'] * 1000) ** 2

# Assign the calculated force to the 'force' column using .loc
df.loc[:, 'force'] = force

In [None]:
# df.to_csv('final_dataset.csv')