In [60]:
import numpy as np
import pandas as pd
import catboost as cb
from datetime import datetime, timedelta

In [61]:
model = cb.CatBoostRegressor()
model = model.load_model('forecasting_model')

In [62]:
df = pd.read_csv('https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&limit=20000&eventtype=earthquake')
df.time = pd.to_datetime(df.time)
df.time = df.time.dt.tz_localize(None)
df = df.sort_values("time")
df = df.set_index('time')
df.head()

Unnamed: 0_level_0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-06-11 06:15:24.120,38.489,-112.900333,3.05,0.91,md,11.0,98.0,0.008125,0.09,uu,...,2024-06-13T20:28:19.060Z,"14 km NE of Milford, Utah",earthquake,0.3,0.52,0.527835,3.0,reviewed,uu,uu
2024-06-11 06:17:43.805,34.95,25.1425,10.0,4.4,mb,55.0,98.0,0.396,1.09,us,...,2024-06-28T12:12:51.462Z,"6 km S of Pýrgos, Greece",earthquake,5.79,1.869,0.103,27.0,reviewed,us,us
2024-06-11 06:19:01.110,51.9215,178.493833,6.32,0.33,ml,4.0,299.0,0.02895,0.07,av,...,2024-06-11T22:54:38.090Z,"Rat Islands, Aleutian Islands, Alaska",earthquake,1.1,0.79,0.21368,4.0,reviewed,av,av
2024-06-11 06:27:08.787,39.0263,72.0695,10.0,4.1,mb,31.0,136.0,0.497,0.78,us,...,2024-06-29T05:10:00.040Z,"52 km ESE of Karakenja, Tajikistan",earthquake,4.8,1.901,0.185,8.0,reviewed,us,us
2024-06-11 06:36:28.820,38.495,-112.891,3.01,0.91,md,17.0,57.0,0.007406,0.17,uu,...,2024-06-11T20:02:52.230Z,"15 km NE of Milford, Utah",earthquake,0.36,0.54,0.440466,6.0,reviewed,uu,uu


In [63]:
df["region"] = df.place.str.split(", ", expand=True)[1]
df.region = df.region.fillna(df.place)
df.region = df.region.replace({"CA": "California", "B.C.": "Baja California"})
df = df[['mag', 'depth', 'region']]
df.head()

Unnamed: 0_level_0,mag,depth,region
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-06-11 06:15:24.120,0.91,3.05,Utah
2024-06-11 06:17:43.805,4.4,10.0,Greece
2024-06-11 06:19:01.110,0.33,6.32,Aleutian Islands
2024-06-11 06:27:08.787,4.1,10.0,Tajikistan
2024-06-11 06:36:28.820,0.91,3.01,Utah


In [64]:
regions = df.region.value_counts()
top_k = 25
top_k_regions = regions.head(top_k).index
df = df.loc[df.region.isin(top_k_regions)]

df = df.groupby("region").resample("d").mean().reset_index()
df.head()

Unnamed: 0,region,time,mag,depth
0,Alaska,2024-06-11,1.094516,29.925161
1,Alaska,2024-06-12,0.762857,26.260781
2,Alaska,2024-06-13,1.062843,27.405176
3,Alaska,2024-06-14,1.1884,31.640227
4,Alaska,2024-06-15,1.14725,26.496237


In [65]:
def reindex(group, delta):
    start_date = group.index.min()
    end_date = pd.Timestamp((datetime.now() + timedelta(days=delta)).date())
    date_range = pd.date_range(start=start_date, end=end_date, freq="d")
    group = group.reindex(date_range)
    group.region = group.region.ffill()
    return group

In [66]:
df = df.set_index('time')
df = (
    df.groupby("region")[["region", "mag", "depth"]]
    .apply(lambda group: reindex(group, 1), include_groups=False)
    .reset_index(0, drop=True)
)
df.head()

Unnamed: 0,region,mag,depth
2024-06-11,Alaska,1.094516,29.925161
2024-06-12,Alaska,0.762857,26.260781
2024-06-13,Alaska,1.062843,27.405176
2024-06-14,Alaska,1.1884,31.640227
2024-06-15,Alaska,1.14725,26.496237


In [67]:
df[df.region == 'Alaska'].tail()

Unnamed: 0,region,mag,depth
2024-07-08,Alaska,0.956744,23.275558
2024-07-09,Alaska,1.051915,25.364255
2024-07-10,Alaska,1.102,18.2407
2024-07-11,Alaska,1.808333,35.116667
2024-07-12,Alaska,,


In [68]:
type(df.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [69]:
df["day"] = df.index.day
df["dayofweek"] = df.index.dayofweek
df["dayofyear"] = df.index.dayofyear

start_lag = 1
end_lag = 7
for i in range(start_lag, end_lag + 1):
    df[f"mag_lag_{i}"] = df.groupby("region").mag.shift(i)

for i in range(start_lag, end_lag + 1):
    df[f"depth_lag_{i}"] = df.groupby("region").depth.shift(i)

df['mag_ewma'] = df.groupby('region')['mag'].transform(lambda x: x.ewm(span=7, adjust=False).mean())
df['depth_ewma'] = df.groupby('region')['depth'].transform(lambda x: x.ewm(span=7, adjust=False).mean())
df.head()

Unnamed: 0,region,mag,depth,day,dayofweek,dayofyear,mag_lag_1,mag_lag_2,mag_lag_3,mag_lag_4,...,mag_lag_7,depth_lag_1,depth_lag_2,depth_lag_3,depth_lag_4,depth_lag_5,depth_lag_6,depth_lag_7,mag_ewma,depth_ewma
2024-06-11,Alaska,1.094516,29.925161,11,1,163,,,,,...,,,,,,,,,1.094516,29.925161
2024-06-12,Alaska,0.762857,26.260781,12,2,164,1.094516,,,,...,,29.925161,,,,,,,1.011601,29.009066
2024-06-13,Alaska,1.062843,27.405176,13,3,165,0.762857,1.094516,,,...,,26.260781,29.925161,,,,,,1.024412,28.608094
2024-06-14,Alaska,1.1884,31.640227,14,4,166,1.062843,0.762857,1.094516,,...,,27.405176,26.260781,29.925161,,,,,1.065409,29.366127
2024-06-15,Alaska,1.14725,26.496237,15,5,167,1.1884,1.062843,0.762857,1.094516,...,,31.640227,27.405176,26.260781,29.925161,,,,1.085869,28.648655


In [77]:
df = df[df.region == 'Alaska'].fillna(-10.0)

In [78]:
df[df.region == 'Alaska'].tail()

Unnamed: 0,region,mag,depth,day,dayofweek,dayofyear,mag_lag_1,mag_lag_2,mag_lag_3,mag_lag_4,...,mag_lag_7,depth_lag_1,depth_lag_2,depth_lag_3,depth_lag_4,depth_lag_5,depth_lag_6,depth_lag_7,mag_ewma,depth_ewma
2024-07-08,Alaska,0.956744,23.275558,8,0,190,0.746762,0.685417,1.392245,0.960769,...,1.210857,18.323714,26.125125,26.167347,25.135123,22.16232,25.679153,27.306443,0.936022,23.87589
2024-07-09,Alaska,1.051915,25.364255,9,1,191,0.956744,0.746762,0.685417,1.392245,...,1.064444,23.275558,18.323714,26.125125,26.167347,25.135123,22.16232,25.679153,0.964995,24.247981
2024-07-10,Alaska,1.102,18.2407,10,2,192,1.051915,0.956744,0.746762,0.685417,...,0.654948,25.364255,23.275558,18.323714,26.125125,26.167347,25.135123,22.16232,0.999246,22.746161
2024-07-11,Alaska,1.808333,35.116667,11,3,193,1.102,1.051915,0.956744,0.746762,...,0.960769,18.2407,25.364255,23.275558,18.323714,26.125125,26.167347,25.135123,1.201518,25.838787
2024-07-12,Alaska,-10.0,-10.0,12,4,194,1.808333,1.102,1.051915,0.956744,...,1.392245,35.116667,18.2407,25.364255,23.275558,18.323714,26.125125,26.167347,1.201518,25.838787


In [70]:
x = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
x

array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)

In [74]:
column_2 = x[:, 0]
column_2

array([1, 4], dtype=int32)