In [1]:
import pandas as pd
import os

In [2]:
data_url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.csv'

In [3]:
df = pd.read_csv(data_url)
print("Original dataset shape:", df.shape)

Original dataset shape: (9605, 22)


In [4]:
display(df.head())

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2025-03-26T04:09:38.404Z,59.8446,-152.2901,72.3,1.4,ml,,,,0.39,...,2025-03-26T04:11:25.482Z,"26 km WNW of Anchor Point, Alaska",earthquake,,1.2,,,automatic,ak,ak
1,2025-03-26T04:08:31.640Z,38.844166,-122.824165,1.79,0.74,md,11.0,124.0,0.01045,0.01,...,2025-03-26T04:10:09.922Z,"9 km WNW of Cobb, CA",earthquake,0.29,0.86,0.07,15.0,automatic,nc,nc
2,2025-03-26T04:06:10.400Z,38.797668,-122.782333,4.0,0.28,md,11.0,87.0,0.01444,0.03,...,2025-03-26T04:07:45.910Z,"3 km NNW of The Geysers, CA",earthquake,0.5,1.42,0.05,11.0,automatic,nc,nc
3,2025-03-26T04:05:53.320Z,38.838833,-122.808334,2.0,0.66,md,8.0,85.0,0.01406,0.01,...,2025-03-26T04:07:30.919Z,"8 km WNW of Cobb, CA",earthquake,0.51,1.54,0.31,8.0,automatic,nc,nc
4,2025-03-26T03:48:49.152Z,-10.0118,160.5868,10.0,5.0,mb,51.0,74.0,0.85,0.8,...,2025-03-26T04:16:49.040Z,"94 km SE of Honiara, Solomon Islands",earthquake,4.5,1.857,0.057,98.0,reviewed,us,us


In [5]:
if 'time' in df.columns:
    df['time'] = pd.to_datetime(df['time'])
    print("Converted 'time' column to datetime.")

Converted 'time' column to datetime.


In [6]:
before_drop = df.shape[0]
df.dropna(inplace=True)
after_drop = df.shape[0]
print(f"Removed {before_drop - after_drop} rows with missing values.")
print("Dataset shape after dropping missing values:", df.shape)

Removed 2220 rows with missing values.
Dataset shape after dropping missing values: (7385, 22)


In [7]:
df.rename(columns={'mag': 'magnitude'}, inplace=True)

In [8]:
schema = {
    "id": str,
    "time": str,
    "latitude": float,
    "longitude": float,
    "depth": float,
    "magnitude": float,
    "place": str,
    "type": str,
    "status": str,
    "tsunami": float,
    "sig": float,
    "net": str,
    "code": str,
    "nst": float,
    "dmin": float,
    "rms": float,
    "gap": float,
    "magType": str,
    "alert": str,
    "horizontalError": float,
    "depthError": float,
    "magError": float,
    "magNst": float,
    "updated": str,
    "locationSource": str,
    "magSource": str
}


In [9]:
for col, dtype in schema.items():
    if col not in df.columns:
        # For numeric types, set default to 0; for strings, set default to empty string.
        default_value = 0 if dtype in [float, int] else ""
        df[col] = default_value
        print(f"Added missing column '{col}' with default value {default_value}.")

Added missing column 'tsunami' with default value 0.
Added missing column 'sig' with default value 0.
Added missing column 'code' with default value .
Added missing column 'alert' with default value .


In [10]:
ordered_cols = list(schema.keys())
df = df[ordered_cols]
print("Columns reordered based on the provided schema.")

Columns reordered based on the provided schema.


In [11]:
output_dir = "public/earthquakes"
os.makedirs(output_dir, exist_ok=True)
print(f"Directory '{output_dir}' is ready.")

Directory 'public/earthquakes' is ready.


In [12]:
processed_csv_path = os.path.join(output_dir, "processed_all_month.csv")
df.to_csv(processed_csv_path, index=False)
print(f"Processed data saved to {processed_csv_path}")

Processed data saved to public/earthquakes/processed_all_month.csv


In [13]:
data_json = df.to_json(orient='records')
print("Preview of JSON output (first 500 characters):")
print(data_json[:500])

Preview of JSON output (first 500 characters):
[{"id":"nc75155422","time":1742962111640,"latitude":38.844165802,"longitude":-122.8241653442,"depth":1.7899999619,"magnitude":0.74,"place":"9 km WNW of Cobb, CA","type":"earthquake","status":"automatic","tsunami":0,"sig":0,"net":"nc","code":"","nst":11.0,"dmin":0.01045,"rms":0.01,"gap":124.0,"magType":"md","alert":"","horizontalError":0.29,"depthError":0.860000014,"magError":0.07,"magNst":15.0,"updated":"2025-03-26T04:10:09.922Z","locationSource":"nc","magSource":"nc"},{"id":"nc75155417","time":
