### Predicting the timestamp of the event occured using locations(lat and long) and depth of earthquake

In [1]:
#importing lib
import pandas as pd
import numpy as np
import datetime
import math
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
import sklearn.metrics as metrics

In [2]:
#loading the dataset
df = pd.read_csv('co-ordinates.csv')
df.head()

Unnamed: 0,latitude,longitude,depth,mag,id,hour,date
0,34.189835,-117.588,6.0,1.7,ci12317071,0,1973-01-01
1,-9.214,150.634,41.0,5.3,usp0000001,3,1973-01-01
2,48.308666,-122.11733,13.68,2.2,uw10840118,4,1973-01-01
3,-15.012,-173.958,33.0,5.0,usp0000002,5,1973-01-01
4,19.443666,-155.36084,7.302,1.85,hv19723167,7,1973-01-01


In [3]:
df.isnull()

Unnamed: 0,latitude,longitude,depth,mag,id,hour,date
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
3855714,False,False,False,False,False,False,False
3855715,False,False,False,False,False,False,False
3855716,False,False,False,False,False,False,False
3855717,False,False,False,False,False,False,False


In [4]:
df.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3855719 entries, 0 to 3855718
Data columns (total 7 columns):
 #   Column     Dtype  
---  ------     -----  
 0   latitude   float64
 1   longitude  float64
 2   depth      float64
 3   mag        float64
 4   id         object 
 5   hour       int64  
 6   date       object 
dtypes: float64(4), int64(1), object(2)
memory usage: 640.1 MB


In [5]:
#dropping the duplicate values
df.drop_duplicates(subset = ['date'], keep = 'first', inplace = True)

In [6]:
# Converting the date to timestamp
df['date'] = pd.to_datetime(df['date'], format= '%Y-%m-%d')

In [7]:
timestamp = df.date.values.astype(float)
timeStamp = pd.Series(timestamp)

In [8]:
#adding the timestamp values to Dataframe
df['Timestamp'] = timeStamp.values

In [9]:
df = df[['latitude','longitude','depth','mag','Timestamp']]
df

Unnamed: 0,latitude,longitude,depth,mag,Timestamp
0,34.189835,-117.588000,6.00,1.70,9.469440e+16
11,-9.854000,117.427000,66.00,5.50,9.478080e+16
28,34.815834,-116.359500,6.00,2.98,9.486720e+16
48,41.305000,-29.272000,33.00,4.50,9.495360e+16
62,0.683000,-80.018000,36.00,4.70,9.504000e+16
...,...,...,...,...,...
3854141,47.579000,-122.545334,5.09,0.81,1.640563e+18
3854413,17.948800,-66.926500,10.00,2.22,1.640650e+18
3854741,58.451200,-143.092800,7.30,2.10,1.640736e+18
3855063,63.085500,-151.405500,2.90,2.30,1.640822e+18


In [10]:
df.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17897 entries, 0 to 3855366
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   17897 non-null  float64
 1   longitude  17897 non-null  float64
 2   depth      17897 non-null  float64
 3   mag        17897 non-null  float64
 4   Timestamp  17897 non-null  float64
dtypes: float64(5)
memory usage: 838.9 KB


### Splitting the data and Standardizing features

In [11]:
x = df[['latitude', 'longitude','depth']]
y = df[['Timestamp']]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)
x_train

Unnamed: 0,latitude,longitude,depth
2461126,37.559666,-122.513664,8.039
2911681,18.170000,-66.818000,25.000
323248,19.658500,-155.966660,9.538
1718261,19.368166,-155.498170,8.483
29662,43.965000,147.009000,33.000
...,...,...,...
1529995,35.207500,-120.866330,2.369
1737228,44.565000,-116.016000,5.000
432926,23.820000,142.893000,33.000
27889,36.912000,-121.473170,4.954


In [13]:
sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)
y_train = sc.fit_transform(y_train)
y_test = sc.fit_transform(y_test)

In [14]:
x_train

array([[ 0.16880357, -0.35505611, -0.27575003],
       [-0.78014453,  0.3132047 ,  0.0061121 ],
       [-0.70729597, -0.75643971, -0.25083927],
       ...,
       [-0.50362833,  2.82940851,  0.1390581 ],
       [ 0.1371062 , -0.34257181, -0.32701733],
       [ 1.41162076, -0.67110372, -0.38109311]])

## RFG : Random Forest Regression

In [15]:
rfg = RandomForestRegressor()

In [16]:
rfg.fit(x_train,y_train)

  rfg.fit(x_train,y_train)


RandomForestRegressor()

### RFG Predictions

In [17]:
#predicting the values using loc and depth
rfg_predict = rfg.predict(x_test)

final_rfg_predicted_values = pd.DataFrame(rfg_predict,columns = ['Timestamp'])
final_rfg_predicted_values

Unnamed: 0,Timestamp
0,1.218328
1,-1.022462
2,-0.726683
3,-0.206004
4,-0.758082
...,...
3575,-0.501446
3576,0.259929
3577,-0.536297
3578,0.198334


### RFG Evaluation Metrics

In [18]:
#mean_squared_error
rfg_mse = mean_squared_error(y_test, rfg_predict)

#root_mean_squared_error
rfg_rmse = math.sqrt(rfg_mse)

#r_squared of the model
rfg_r2 = r2_score(y_test, rfg_predict)

print(f'MSE : {rfg_mse}, RMSE : {rfg_rmse}, R_Squared : {rfg_r2}')

MSE : 0.8463044190633853, RMSE : 0.9199480523721898, R_Squared : 0.15369558093661495


In [19]:
#score of the model
model_score = rfg.score(x_test, rfg_predict)
print(f'Score of the model : {model_score}')

Score of the model : 1.0


## SVR : Support Vector Regression

In [20]:
svr_reg = SVR(kernel='rbf')

In [21]:
svr_reg.fit(x_train,y_train)

  y = column_or_1d(y, warn=True)


SVR()

### SVR Predictions

In [22]:
svr_reg_pred = svr_reg.predict(x_test)

final_svr_predicted_values = pd.DataFrame(svr_reg_pred,columns = ['Timestamp'])
final_svr_predicted_values

Unnamed: 0,Timestamp
0,0.806587
1,-0.485679
2,-0.282179
3,0.137419
4,-1.091897
...,...
3575,-0.209858
3576,0.558045
3577,-0.219920
3578,-0.188199


### SVR Evaluation Metrics

In [23]:
#mean_squared_error
svr_mse = mean_squared_error(y_test, svr_reg_pred)

#root_mean_squared_error
svr_rmse = math.sqrt(svr_mse)

#r_squared of the model
svr_r2 = r2_score(y_test, svr_reg_pred)

print(f'MSE : {svr_mse}, RMSE : {svr_rmse}, R_Squared : {svr_r2}')

MSE : 0.849554434860911, RMSE : 0.9217127724301704, R_Squared : 0.15044556513908924


## ANN with GridSearchCV

In [24]:
def nn(neurons, activation, optimizer, loss):
    model = Sequential()
    model.add(Dense(neurons, activation=activation, input_shape=(3,)))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(2, activation='softmax'))
    
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    
    return model

In [25]:
model = KerasClassifier(build_fn=nn, verbose=0)

  model = KerasClassifier(build_fn=nn, verbose=0)


In [26]:
neurons = [16]
batch_size = [10]
epochs = [10]
activation = ['sigmoid', 'relu']
optimizer = ['SGD', 'Adam']
loss = ['mean_squared_error']

In [27]:
prm = dict(neurons=neurons, batch_size=batch_size, epochs=epochs, activation=activation, optimizer=optimizer, loss=loss)

In [28]:
gsc = GridSearchCV(estimator=model, param_grid=prm, n_jobs=-1)

In [29]:
result = gsc.fit(x_train, y_train)

Metal device set to: Apple M1
Metal device set to: Apple M1
Metal device set to: Apple M1
Metal device set to: Apple M1
Metal device set to: Apple M1
Metal device set to: Apple M1


2022-12-20 16:36:16.530584: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 16:36:16.531160: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-12-20 16:36:16.606291: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 16:36:16.606520: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL,

Metal device set to: Apple M1
Metal device set to: Apple M1


2022-12-20 16:36:16.745651: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 16:36:16.745764: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-12-20 16:36:16.773153: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 16:36:16.773794: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL,

2022-12-20 16:43:25.590931: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-20 16:43:25.591118: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-12-20 16:43:25.763137: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Metal device set to: Apple M1


2022-12-20 16:43:25.950364: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [30]:
print("Best: %f using %s" % (result.best_score_,result.best_params_))

Best: 0.000140 using {'activation': 'sigmoid', 'batch_size': 10, 'epochs': 10, 'loss': 'mean_squared_error', 'neurons': 16, 'optimizer': 'Adam'}


In [42]:
mean_test_score = result.cv_results_['mean_test_score']
std_test_score = result.cv_results_['std_test_score']
params = result.cv_results_['params']

In [43]:
model = Sequential()

model.add(Dense(16, activation='relu', input_shape=(3,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [44]:
model.compile(optimizer='SGD', loss='mean_squared_error')

In [45]:
model.fit(x_train, y_train, batch_size=10, epochs=20,verbose =1, validation_data=(x_test,y_test))

Epoch 1/20
  23/1432 [..............................] - ETA: 6s - loss: 1.2821 

2022-12-20 16:53:43.473488: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-12-20 16:53:49.616770: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2ea7c1790>

### ANN Evaluation Metrics

In [78]:
ann_mse = model.evaluate(x_test, y_test)
ann_rmse = math.sqrt(ann_mse)



In [48]:
print(f"Mean Squared Error = {ann_mse}, Root Mean Squared Error = {ann_rmse}")

Mean Squared Error = 1.2500306367874146, Root Mean Squared Error = 1.118047689853798


## Comparison of the models

#### Models without Hyper-Paramter tuning

In [85]:
compar_data1 = { 'RFG' : [rfg_mse,rfg_rmse,rfg_r2],
                'SVR' : [svr_mse,svr_rmse,svr_r2],
              }

In [87]:
model_compar1 = pd.DataFrame(compar_data1,index = ['MSE','RMSE','R SQUARED'])
model_compar1

Unnamed: 0,RFG,SVR
MSE,0.846304,0.849554
RMSE,0.919948,0.921713
R SQUARED,0.153696,0.150446


RFG performed better and faster than SVR

#### All models

In [81]:
compar_data2 = { 'RFG' : [rfg_mse,rfg_rmse],
                'SVR' : [svr_mse,svr_rmse],
                'ANN' : [ann_mse,ann_rmse]
              }

In [82]:
model_compar2 = pd.DataFrame(compar_data2,index = ['MSE','RMSE'])
model_compar2

Unnamed: 0,RFG,SVR,ANN
MSE,0.846304,0.849554,1.250031
RMSE,0.919948,0.921713,1.118048


Among 3 models, RFG performed slightly better