In [110]:
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
from shapely.geometry import Point, Polygon

In [111]:
path = 'earthquakes_2023_global.csv'
df = pd.read_csv(path)
df.head(2)

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2023-01-01T00:49:25.294Z,52.0999,178.5218,82.77,3.1,ml,14.0,139.0,0.87,0.18,...,2023-03-11T22:51:52.040Z,"Rat Islands, Aleutian Islands, Alaska",earthquake,8.46,21.213,0.097,14.0,reviewed,us,us
1,2023-01-01T01:41:43.755Z,7.1397,126.738,79.194,4.5,mb,32.0,104.0,1.152,0.47,...,2023-03-11T22:51:45.040Z,"23 km ESE of Manay, Philippines",earthquake,5.51,7.445,0.083,43.0,reviewed,us,us


- 1. Convert the coordinates columns from object to floats 
- 2. Create geometry list from coordinates
- 3. check if any coordinate is missing (if missing, drop)
- 4. Create a geodataframe & save it as a geoJson

In [112]:
# checking if coordinates are floats
# null count = 0 so we can skip step 2
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26642 entries, 0 to 26641
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   time             26642 non-null  object 
 1   latitude         26642 non-null  float64
 2   longitude        26642 non-null  float64
 3   depth            26642 non-null  float64
 4   mag              26642 non-null  float64
 5   magType          26642 non-null  object 
 6   nst              25227 non-null  float64
 7   gap              25225 non-null  float64
 8   dmin             24776 non-null  float64
 9   rms              26642 non-null  float64
 10  net              26642 non-null  object 
 11  id               26642 non-null  object 
 12  updated          26642 non-null  object 
 13  place            25034 non-null  object 
 14  type             26642 non-null  object 
 15  horizontalError  25093 non-null  float64
 16  depthError       26642 non-null  float64
 17  magError    

In [113]:
# Create Geometry List with list comprehension
geometry_list = [Point(xy) for xy in zip(df["longitude"], df["latitude"])]


In [117]:
# create geodataframe
gdf = gpd.GeoDataFrame(df, crs='epsg:4326', geometry=geometry_list)
gdf.head().T

Unnamed: 0,0,1,2,3,4
time,2023-01-01T00:49:25.294Z,2023-01-01T01:41:43.755Z,2023-01-01T03:29:31.070Z,2023-01-01T04:09:32.814Z,2023-01-01T04:29:13.793Z
latitude,52.0999,7.1397,19.1631,-4.7803,53.3965
longitude,178.5218,126.738,-66.5251,102.7675,-166.9417
depth,82.77,79.194,24.0,63.787,10.0
mag,3.1,4.5,3.93,4.3,3.0
magType,ml,mb,md,mb,ml
nst,14.0,32.0,23.0,17.0,19.0
gap,139.0,104.0,246.0,187.0,190.0
dmin,0.87,1.152,0.8479,0.457,0.4
rms,0.18,0.47,0.22,0.51,0.31


In [120]:
gdf.to_file("earthquakes_processed.geojson", driver="GeoJSON")