In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix , recall_score , precision_score , f1_score ,classification_report,plot_confusion_matrix


In [2]:
# Read Training Data and Test Data into pandas df
df_train=pd.read_csv(r"database.csv")
df_test=pd.read_csv(r"earthquakeTest.csv.txt")


In [3]:
df_train.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [4]:
df_test.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,11.5,2.68,ml,41.0,77.0,0.06553,0.26,...,2017-02-08T21:33:00.874Z,"2km W of Brawley, CA",earthquake,0.24,0.46,0.196,64.0,reviewed,ci,ci
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,78.93,5.0,mb,,101.0,2.058,0.75,...,2017-03-27T23:53:16.040Z,"131km NNW of Tobelo, Indonesia",earthquake,6.8,7.1,0.065,75.0,reviewed,us,us
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,9.4,2.65,ml,42.0,75.0,0.07023,0.24,...,2017-02-08T21:36:24.950Z,"2km WSW of Brawley, CA",earthquake,0.23,0.61,0.198,76.0,reviewed,ci,ci
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,10.0,4.1,mb,,185.0,2.457,0.32,...,2017-03-27T23:53:16.040Z,"180km WNW of Panguna, Papua New Guinea",earthquake,7.5,1.9,0.184,8.0,reviewed,us,us
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,49.25,4.1,mb,,132.0,7.174,0.9,...,2017-03-27T23:53:16.040Z,"132km WSW of Abepura, Indonesia",earthquake,13.5,8.4,0.166,10.0,reviewed,us,us


## Clean Datasets

In [5]:
df_train_loaded=df_train.drop(['Time','Depth Error','Depth Seismic Stations','Magnitude Error','Magnitude Seismic Stations'	,'Azimuthal Gap','Horizontal Distance',
    'Horizontal Error','Root Mean Square','Source','Location Source','Magnitude Source','Status'] , axis=1)

df_test_loaded = df_test[['time','latitude','longitude','mag','depth']]
# preview datasets
df_train_loaded

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude Type,ID
0,01/02/1965,19.2460,145.6160,Earthquake,131.60,6.0,MW,ISCGEM860706
1,01/04/1965,1.8630,127.3520,Earthquake,80.00,5.8,MW,ISCGEM860737
2,01/05/1965,-20.5790,-173.9720,Earthquake,20.00,6.2,MW,ISCGEM860762
3,01/08/1965,-59.0760,-23.5570,Earthquake,15.00,5.8,MW,ISCGEM860856
4,01/09/1965,11.9380,126.4270,Earthquake,15.00,5.8,MW,ISCGEM860890
...,...,...,...,...,...,...,...,...
23407,12/28/2016,38.3917,-118.8941,Earthquake,12.30,5.6,ML,NN00570710
23408,12/28/2016,38.3777,-118.8957,Earthquake,8.80,5.5,ML,NN00570744
23409,12/28/2016,36.9179,140.4262,Earthquake,10.00,5.9,MWW,US10007NAF
23410,12/29/2016,-9.0283,118.6639,Earthquake,79.00,6.3,MWW,US10007NL0


In [6]:
df_test_loaded.head()

Unnamed: 0,time,latitude,longitude,mag,depth
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,2.68,11.5
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,5.0,78.93
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,2.65,9.4
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,4.1,10.0
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,4.1,49.25


In [7]:
# Rename  columns 
df_train_loaded=df_train_loaded.rename(columns={'Magnitude Type':'Magnitude_Type'})
df_test_loaded=df_test_loaded.rename(columns={'time':'Date','latitude':'Latitude','longitude':'Longitude','mag':"Magnitude",
                                             'depth':'Depth'})

In [8]:
# preview datasets
df_train_loaded.head()

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude_Type,ID
0,01/02/1965,19.246,145.616,Earthquake,131.6,6.0,MW,ISCGEM860706
1,01/04/1965,1.863,127.352,Earthquake,80.0,5.8,MW,ISCGEM860737
2,01/05/1965,-20.579,-173.972,Earthquake,20.0,6.2,MW,ISCGEM860762
3,01/08/1965,-59.076,-23.557,Earthquake,15.0,5.8,MW,ISCGEM860856
4,01/09/1965,11.938,126.427,Earthquake,15.0,5.8,MW,ISCGEM860890


In [9]:
# preview datasets
df_test_loaded.head()

Unnamed: 0,Date,Latitude,Longitude,Magnitude,Depth
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,2.68,11.5
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,5.0,78.93
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,2.65,9.4
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,4.1,10.0
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,4.1,49.25


In [10]:
# Define training and testing dataets
df_testing=df_test_loaded[['Latitude','Longitude','Magnitude','Depth']]
df_training=df_train_loaded[['Latitude','Longitude','Magnitude','Depth']]
df_training

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,19.2460,145.6160,6.0,131.60
1,1.8630,127.3520,5.8,80.00
2,-20.5790,-173.9720,6.2,20.00
3,-59.0760,-23.5570,5.8,15.00
4,11.9380,126.4270,5.8,15.00
...,...,...,...,...
23407,38.3917,-118.8941,5.6,12.30
23408,38.3777,-118.8957,5.5,8.80
23409,36.9179,140.4262,5.9,10.00
23410,-9.0283,118.6639,6.3,79.00


In [11]:
# Drop Nulls from datasets
df_training=df_training.dropna()
df_training

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,19.2460,145.6160,6.0,131.60
1,1.8630,127.3520,5.8,80.00
2,-20.5790,-173.9720,6.2,20.00
3,-59.0760,-23.5570,5.8,15.00
4,11.9380,126.4270,5.8,15.00
...,...,...,...,...
23407,38.3917,-118.8941,5.6,12.30
23408,38.3777,-118.8957,5.5,8.80
23409,36.9179,140.4262,5.9,10.00
23410,-9.0283,118.6639,6.3,79.00


In [12]:
df_testing.dropna()

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,32.980000,-115.545833,2.68,11.500
1,2.832700,127.578600,5.00,78.930
2,32.973000,-115.550500,2.65,9.400
3,-5.949700,153.898800,4.10,10.000
4,-2.930200,139.432800,4.10,49.250
...,...,...,...,...
19995,-21.459800,168.774000,4.30,10.000
19996,35.239500,-97.745300,2.60,6.364
19997,42.139833,-121.692667,2.58,6.880
19998,67.461600,-158.713600,2.80,6.500


In [13]:
# Create Training data features
X = df_training[['Latitude','Longitude']]
y = df_training[['Magnitude','Depth']]

# New Test Datafeatures 
x_new = df_testing[['Latitude','Longitude']]
y_new = df_testing[['Magnitude','Depth']]

# Use Train Test Split on Training Data Featurss
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3 , random_state=42)

In [29]:
# Build The Model

model_reg = RandomForestRegressor(random_state=50)
model_reg.fit(X_train,y_train)
model_reg.predict(X_test)
score = model_reg.score(X_test,y_test)*100

In [30]:
score

35.57926568668559

In [22]:
# Improve Model accuracy by automating hyperparametr tuning
parameters = {'n_estimators':[10,20,50,100,200,500]}

grid_obj = GridSearchCV(model_reg,parameters)
grid_fit = grid_obj.fit(X_train,y_train)
best_fit = grid_fit.best_estimator_
results = best_fit.predict(X_test)

In [23]:
# Preview predicted values
print(results)

[[  5.7774 126.2732]
 [  5.5182  12.5178]
 [  5.69    68.0792]
 ...
 [  5.7378  53.5324]
 [  5.5936  99.5556]
 [  6.0434  35.1892]]


In [24]:
score = best_fit.score(X_test,y_test)*100
score

35.90210710298802