# **Data Retrival**

In [1]:
#| label: cell-dataretrival
# do any necessary imports
# Necessary imports
import json
from datetime import datetime, timedelta, timezone
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import xmltodict
import schedule
import time

## **Getting data from API**

In [2]:
#| label: cell-dataretrival1
#get data of Earthquakes from API
def fetch_data():
    response = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
    # Convert JSON response to a dictionary
        data = response.json()
    else:
        print(f"Failed to fetch data from API. Status code: {response.status_code}")
    return data


In [3]:
#| label: cell-dataretrival2
data = fetch_data()

In [4]:
#| label: cell-dataretrival3
data["features"][0]

{'type': 'Feature',
 'properties': {'mag': 0.98,
  'place': '4 km WNW of The Geysers, CA',
  'time': 1714586936200,
  'updated': 1714587033403,
  'tz': None,
  'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/nc74043861',
  'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=nc74043861&format=geojson',
  'felt': None,
  'cdi': None,
  'mmi': None,
  'alert': None,
  'status': 'automatic',
  'tsunami': 0,
  'sig': 15,
  'net': 'nc',
  'code': '74043861',
  'ids': ',nc74043861,',
  'sources': ',nc,',
  'types': ',nearby-cities,origin,phase-data,',
  'nst': 7,
  'dmin': 0.006471,
  'rms': 0.03,
  'gap': 92,
  'magType': 'md',
  'type': 'earthquake',
  'title': 'M 1.0 - 4 km WNW of The Geysers, CA'},
 'geometry': {'type': 'Point',
  'coordinates': [-122.7973328, 38.7929993, 0.09]},
 'id': 'nc74043861'}

In [5]:
#| label: cell-dataretrival4
# Extract desired information
generated = data['metadata']['generated']
url = data['metadata']['url']
title = data['metadata']['title']
count = data['metadata']['count']

# Print extracted information
print("Generated:", generated)
print("URL:", url)
print("Title:", title)
print("Count:", count)

Generated: 1714587391000
URL: https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson
Title: USGS Earthquakes
Count: 10550


## **Automate the data**

In [6]:
#| label: cell-dataretrival5
# Schedule the job to run at 11:58 PM UTC every day
schedule.every().day.at("23:58").do(fetch_data)

schedule.run_pending()

# **Raw data to Pandas DataFrame**

In [7]:
#| label: cell-datatopandas
df = pd.json_normalize(data['features'])
df

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,nc74043861,0.98,"4 km WNW of The Geysers, CA",1714586936200,1714587033403,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,",7.0,0.006471,0.03,92.0,md,earthquake,"M 1.0 - 4 km WNW of The Geysers, CA",Point,"[-122.7973328, 38.7929993, 0.09]"
1,Feature,ci40735128,1.26,"11 km S of Bodfish, CA",1714586911070,1714587280078,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,scitech-link,",28.0,0.177400,0.13,53.0,ml,earthquake,"M 1.3 - 11 km S of Bodfish, CA",Point,"[-118.5136667, 35.488, 2.9]"
2,Feature,ak0245m2b85x,1.40,"8 km ESE of Point Possession, Alaska",1714586705812,1714586833908,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.58,,ml,earthquake,"M 1.4 - 8 km ESE of Point Possession, Alaska",Point,"[-150.5416, 60.8988, 32]"
3,Feature,us6000mvhq,3.00,"7 km SSW of Mansfield, Texas",1714586603736,1714587304796,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,8.0,...,",dyfi,origin,phase-data,",28.0,0.566000,0.51,65.0,mb_lg,earthquake,"M 3.0 - 7 km SSW of Mansfield, Texas",Point,"[-97.1622, 32.5016, 5]"
4,Feature,ak0245m2a8y3,1.10,"16 km ESE of Clam Gulch, Alaska",1714586449391,1714586571738,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.52,,ml,earthquake,"M 1.1 - 16 km ESE of Clam Gulch, Alaska",Point,"[-151.1163, 60.1808, 63]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10545,Feature,us7000mat5,4.20,"83 km SSE of Pondaguitan, Philippines",1711995638934,1712938793040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",26.0,1.663000,0.25,130.0,mb,earthquake,"M 4.2 - 83 km SSE of Pondaguitan, Philippines",Point,"[126.5299, 5.6918, 55.452]"
10546,Feature,ak02448g9w4t,1.70,"26 km WNW of Tyonek, Alaska",1711995581884,1714182017226,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.48,,ml,earthquake,"M 1.7 - 26 km WNW of Tyonek, Alaska",Point,"[-151.5928, 61.1476, 78.2]"
10547,Feature,nc74027466,0.75,"7 km WNW of The Geysers, CA",1711995448620,1711995542195,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,",8.0,0.001303,0.01,92.0,md,earthquake,"M 0.8 - 7 km WNW of The Geysers, CA",Point,"[-122.8290024, 38.8101654, 0.92]"
10548,Feature,ok2024glqz,1.09,"3 km SSW of Mill Creek, Oklahoma",1711995411770,1712062443207,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",32.0,0.256450,0.37,139.0,ml,quarry blast,"M 1.1 Quarry Blast - 3 km SSW of Mill Creek, O...",Point,"[-96.84466667, 34.379, 0]"


# **Data enrichment**

In [8]:
#| label: cell-dataenrichment
df_new = df.copy()
df_new.head()

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,nc74043861,0.98,"4 km WNW of The Geysers, CA",1714586936200,1714587033403,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,",7.0,0.006471,0.03,92.0,md,earthquake,"M 1.0 - 4 km WNW of The Geysers, CA",Point,"[-122.7973328, 38.7929993, 0.09]"
1,Feature,ci40735128,1.26,"11 km S of Bodfish, CA",1714586911070,1714587280078,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,scitech-link,",28.0,0.1774,0.13,53.0,ml,earthquake,"M 1.3 - 11 km S of Bodfish, CA",Point,"[-118.5136667, 35.488, 2.9]"
2,Feature,ak0245m2b85x,1.4,"8 km ESE of Point Possession, Alaska",1714586705812,1714586833908,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.58,,ml,earthquake,"M 1.4 - 8 km ESE of Point Possession, Alaska",Point,"[-150.5416, 60.8988, 32]"
3,Feature,us6000mvhq,3.0,"7 km SSW of Mansfield, Texas",1714586603736,1714587304796,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,8.0,...,",dyfi,origin,phase-data,",28.0,0.566,0.51,65.0,mb_lg,earthquake,"M 3.0 - 7 km SSW of Mansfield, Texas",Point,"[-97.1622, 32.5016, 5]"
4,Feature,ak0245m2a8y3,1.1,"16 km ESE of Clam Gulch, Alaska",1714586449391,1714586571738,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.52,,ml,earthquake,"M 1.1 - 16 km ESE of Clam Gulch, Alaska",Point,"[-151.1163, 60.1808, 63]"


In [9]:
#| label: cell-dataenrichment1
df_new['properties.time'] = pd.to_datetime(df_new['properties.time'], unit='ms')

In [10]:
#| label: cell-dataenrichment2
df_new['properties.updated'] = pd.to_datetime(df_new['properties.updated'], unit='ms')

In [11]:
#| label: cell-dataenrichment3
df_new.head()

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,nc74043861,0.98,"4 km WNW of The Geysers, CA",2024-05-01 18:08:56.200,2024-05-01 18:10:33.403,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,",7.0,0.006471,0.03,92.0,md,earthquake,"M 1.0 - 4 km WNW of The Geysers, CA",Point,"[-122.7973328, 38.7929993, 0.09]"
1,Feature,ci40735128,1.26,"11 km S of Bodfish, CA",2024-05-01 18:08:31.070,2024-05-01 18:14:40.078,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",nearby-cities,origin,phase-data,scitech-link,",28.0,0.1774,0.13,53.0,ml,earthquake,"M 1.3 - 11 km S of Bodfish, CA",Point,"[-118.5136667, 35.488, 2.9]"
2,Feature,ak0245m2b85x,1.4,"8 km ESE of Point Possession, Alaska",2024-05-01 18:05:05.812,2024-05-01 18:07:13.908,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.58,,ml,earthquake,"M 1.4 - 8 km ESE of Point Possession, Alaska",Point,"[-150.5416, 60.8988, 32]"
3,Feature,us6000mvhq,3.0,"7 km SSW of Mansfield, Texas",2024-05-01 18:03:23.736,2024-05-01 18:15:04.796,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,8.0,...,",dyfi,origin,phase-data,",28.0,0.566,0.51,65.0,mb_lg,earthquake,"M 3.0 - 7 km SSW of Mansfield, Texas",Point,"[-97.1622, 32.5016, 5]"
4,Feature,ak0245m2a8y3,1.1,"16 km ESE of Clam Gulch, Alaska",2024-05-01 18:00:49.391,2024-05-01 18:02:51.738,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,...,",origin,phase-data,",,,0.52,,ml,earthquake,"M 1.1 - 16 km ESE of Clam Gulch, Alaska",Point,"[-151.1163, 60.1808, 63]"


### Extract Longitude, latitude and date data from pd

In [12]:
#| label: cell-dataenrichment4
# Split 'geometry.coordinates' into separate columns for longitude and latitude
df_new[['longitude', 'latitude', 'altitude']] = pd.DataFrame(df_new['geometry.coordinates'].tolist()).copy()

# Extract date from 'properties.time'
df_new['date'] = pd.to_datetime(df_new['properties.time']).dt.date.copy()
df_new['time'] = pd.to_datetime(df_new['properties.time']).dt.time.copy()


In [13]:
#| label: cell-dataenrichment5
# Extract 'longitude', 'latitude', and 'date' from df_new_filtered
data_list = df_new[['longitude', 'latitude', 'date']].values.tolist()

In [14]:
#| label: cell-dataenrichment6
data_list[10]

[-122.8193359, 38.8154984, datetime.date(2024, 5, 1)]