In [1]:
import pandas as pd
from datetime import datetime, timedelta

In [2]:
df = pd.read_csv('earthquakes.csv')

df.time = pd.to_datetime(df.time)
df = df.loc[df.time >= "1994-01-01"]
df = df.sort_values("time")
df = df.set_index("time")

df["region"] = df.place.str.split(", ", expand=True)[1]
df.region = df.region.fillna(df.place)
df.region = df.region.replace({"CA": "California", "B.C.": "Baja California"})

regions = df.region.value_counts()
top_k = 25
top_k_regions = regions.head(top_k).index
print(top_k_regions)

Index(['California', 'Alaska', 'Nevada', 'Hawaii', 'Washington', 'Utah',
       'Montana', 'Puerto Rico', 'Indonesia', 'Chile', 'Baja California',
       'Oklahoma', 'Japan', 'Greece', 'Papua New Guinea', 'Philippines',
       'Mexico', 'Italy', 'Russia', 'Idaho', 'Aleutian Islands', 'Tonga',
       'Oregon', 'Wyoming', 'Turkey'],
      dtype='object', name='region')


In [3]:
live_data = pd.read_csv(
    "https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&eventtype=earthquake&limit=20000"
)
live_data.time = pd.to_datetime(live_data.time)
live_data = live_data.sort_values("time")
live_data = live_data.set_index("time")

live_data["region"] = live_data.place.str.split(", ", expand=True)[1]
live_data.region = live_data.region.fillna(live_data.place)
live_data.region = live_data.region.replace({"CA": "California", "B.C.": "Baja California"})

live_data = live_data.loc[live_data.region.isin(top_k_regions)]

live_data = live_data[["depth", "mag", "region", 'latitude', 'longitude']]

live_data = live_data.groupby("region").resample("d").mean().reset_index()
live_data.head()

Unnamed: 0,region,time,depth,mag,latitude,longitude
0,Alaska,2024-05-09 00:00:00+00:00,37.609574,1.341277,59.784236,-147.013719
1,Alaska,2024-05-10 00:00:00+00:00,24.873725,1.051765,60.616102,-153.372152
2,Alaska,2024-05-11 00:00:00+00:00,12.9661,0.9238,57.840715,-150.80531
3,Alaska,2024-05-12 00:00:00+00:00,24.525171,1.023429,58.691787,-156.738375
4,Alaska,2024-05-13 00:00:00+00:00,21.300727,1.37438,57.063104,-157.645071


In [4]:
live_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 698 entries, 0 to 697
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   region     698 non-null    object             
 1   time       698 non-null    datetime64[ns, UTC]
 2   depth      546 non-null    float64            
 3   mag        546 non-null    float64            
 4   latitude   546 non-null    float64            
 5   longitude  546 non-null    float64            
dtypes: datetime64[ns, UTC](1), float64(4), object(1)
memory usage: 32.8+ KB


In [5]:
live_data.time = live_data.time.dt.tz_localize(None)

In [6]:
live_data.head()

Unnamed: 0,region,time,depth,mag,latitude,longitude
0,Alaska,2024-05-09,37.609574,1.341277,59.784236,-147.013719
1,Alaska,2024-05-10,24.873725,1.051765,60.616102,-153.372152
2,Alaska,2024-05-11,12.9661,0.9238,57.840715,-150.80531
3,Alaska,2024-05-12,24.525171,1.023429,58.691787,-156.738375
4,Alaska,2024-05-13,21.300727,1.37438,57.063104,-157.645071


In [7]:
def reindex(group):
    start_date = group.index.min()
    end_date = pd.Timestamp((datetime.now() + timedelta(days=3)).date())
    date_range = pd.date_range(start=start_date, end=end_date, freq="d")
    return group.reindex(date_range).ffill()

In [8]:
live_data = live_data.set_index('time')
live_data.head()

Unnamed: 0_level_0,region,depth,mag,latitude,longitude
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-09,Alaska,37.609574,1.341277,59.784236,-147.013719
2024-05-10,Alaska,24.873725,1.051765,60.616102,-153.372152
2024-05-11,Alaska,12.9661,0.9238,57.840715,-150.80531
2024-05-12,Alaska,24.525171,1.023429,58.691787,-156.738375
2024-05-13,Alaska,21.300727,1.37438,57.063104,-157.645071


In [9]:

live_data = live_data.groupby('region')[['region', 'mag', 'depth', 'latitude', 'longitude']].apply(reindex, include_groups=False).reset_index(0, drop=True)
live_data.head()

Unnamed: 0,region,mag,depth,latitude,longitude
2024-05-09,Alaska,1.341277,37.609574,59.784236,-147.013719
2024-05-10,Alaska,1.051765,24.873725,60.616102,-153.372152
2024-05-11,Alaska,0.9238,12.9661,57.840715,-150.80531
2024-05-12,Alaska,1.023429,24.525171,58.691787,-156.738375
2024-05-13,Alaska,1.37438,21.300727,57.063104,-157.645071


In [11]:
live_data.loc[live_data.region == 'Alaska'].tail()

Unnamed: 0,region,mag,depth,latitude,longitude
2024-06-07,Alaska,0.92802,24.610198,58.912757,-155.713745
2024-06-08,Alaska,1.694118,20.017647,62.4167,-150.600843
2024-06-09,Alaska,1.694118,20.017647,62.4167,-150.600843
2024-06-10,Alaska,1.694118,20.017647,62.4167,-150.600843
2024-06-11,Alaska,1.694118,20.017647,62.4167,-150.600843


In [10]:
live_data.isnull().sum()

region       0
mag          0
depth        0
latitude     0
longitude    0
dtype: int64