In [1]:
import pandas as pd 

In [2]:
import numpy as np

In [3]:
df = pd.read_csv('italy.csv')

In [4]:
df

Unnamed: 0,Time,Latitude,Longitude,Depth/Km,Magnitude
0,2016-08-24 03:36:32.000,42.6983,13.2335,8.1,6.0
1,2016-08-24 03:37:26.580,42.7123,13.2533,9.0,4.5
2,2016-08-24 03:40:46.590,42.7647,13.1723,9.7,3.8
3,2016-08-24 03:41:38.900,42.7803,13.1683,9.7,3.9
4,2016-08-24 03:42:07.170,42.7798,13.1575,9.7,3.6
...,...,...,...,...,...
8081,2016-11-30 18:39:27.600,42.8903,13.0197,10.7,2.0
8082,2016-11-30 18:43:14.850,42.9443,13.2003,8.6,2.6
8083,2016-11-30 20:18:27.550,43.0092,13.1288,8.0,2.2
8084,2016-11-30 20:45:11.780,43.0197,13.1017,9.3,2.7


In [5]:
df = df.rename(columns={'Latitude': 'lat', 'Longitude': 'long', 'Depth/Km': 'depth', 'Magnitude': 'mag'})
df

Unnamed: 0,Time,lat,long,depth,mag
0,2016-08-24 03:36:32.000,42.6983,13.2335,8.1,6.0
1,2016-08-24 03:37:26.580,42.7123,13.2533,9.0,4.5
2,2016-08-24 03:40:46.590,42.7647,13.1723,9.7,3.8
3,2016-08-24 03:41:38.900,42.7803,13.1683,9.7,3.9
4,2016-08-24 03:42:07.170,42.7798,13.1575,9.7,3.6
...,...,...,...,...,...
8081,2016-11-30 18:39:27.600,42.8903,13.0197,10.7,2.0
8082,2016-11-30 18:43:14.850,42.9443,13.2003,8.6,2.6
8083,2016-11-30 20:18:27.550,43.0092,13.1288,8.0,2.2
8084,2016-11-30 20:45:11.780,43.0197,13.1017,9.3,2.7


In [6]:
# Convert 'time' column to datetime
df['Time'] = pd.to_datetime(df['Time'])

# Extract only the date portion and rename the column
df['date'] = df['Time'].dt.date

# Reorder columns to place 'date' as the first column
df = df[['date', 'lat', 'long', 'depth', 'mag', 'Time']]

# Drop the original 'time' column if you no longer need it
df = df.drop(columns=['Time'])
df

Unnamed: 0,date,lat,long,depth,mag
0,2016-08-24,42.6983,13.2335,8.1,6.0
1,2016-08-24,42.7123,13.2533,9.0,4.5
2,2016-08-24,42.7647,13.1723,9.7,3.8
3,2016-08-24,42.7803,13.1683,9.7,3.9
4,2016-08-24,42.7798,13.1575,9.7,3.6
...,...,...,...,...,...
8081,2016-11-30,42.8903,13.0197,10.7,2.0
8082,2016-11-30,42.9443,13.2003,8.6,2.6
8083,2016-11-30,43.0092,13.1288,8.0,2.2
8084,2016-11-30,43.0197,13.1017,9.3,2.7


In [7]:
X = df[['lat', 'long', 'depth']]
Y = df['mag']

In [9]:
import statsmodels.api as sm

#add constant 
X = sm.add_constant(X)

#fit model
reg_res = sm.OLS(Y, X).fit()

reg_res.summary()

0,1,2,3
Dep. Variable:,mag,R-squared:,0.01
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,27.78
Date:,"Sun, 26 Nov 2023",Prob (F-statistic):,7.31e-18
Time:,17:16:53,Log-Likelihood:,-4538.9
No. Observations:,8086,AIC:,9086.0
Df Residuals:,8082,BIC:,9114.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.4683,2.564,2.912,0.004,2.441,12.495
lat,-0.0883,0.046,-1.923,0.055,-0.178,0.002
long,-0.0827,0.068,-1.215,0.224,-0.216,0.051
depth,-0.0200,0.002,-8.998,0.000,-0.024,-0.016

0,1,2,3
Omnibus:,3019.418,Durbin-Watson:,1.342
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13244.306
Skew:,1.796,Prob(JB):,0.0
Kurtosis:,8.139,Cond. No.,25000.0


In [10]:
# Get predicted values from the model
predicted_values = reg_res.predict(X)

# Calculate residuals
residuals = Y - predicted_values

# Mean Absolute Error (MAE)
mae = np.mean(np.abs(residuals))

# Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs(residuals / Y)) * 100

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(np.mean(residuals**2))

# Display the results
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.4f}%")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

Mean Absolute Error (MAE): 0.3177
Mean Absolute Percentage Error (MAPE): 12.7058%
Root Mean Squared Error (RMSE): 0.4242
