In [113]:
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split

In [42]:
df = pd.read_csv('dataset\database.csv')

In [115]:
df.shape[0]

23412

In [126]:
train, test = train_test_split(np.arange(0, df.shape[0],1), train_size = 0.8, random_state = 66)
mask = np.zeros(df.shape[0])
mask[train] = 1
mask = mask == 1

array([ True,  True,  True, ..., False,  True,  True], dtype=bool)

In [130]:
df.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [131]:
df.describe()

Unnamed: 0,Latitude,Longitude,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Error,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square
count,23412.0,23412.0,23412.0,4461.0,7097.0,23412.0,327.0,2564.0,7299.0,1604.0,1156.0,17352.0
mean,1.679033,39.639961,70.767911,4.993115,275.364098,5.882531,0.07182,48.944618,44.163532,3.99266,7.662759,1.022784
std,30.113183,125.511959,122.651898,4.875184,162.141631,0.423066,0.051466,62.943106,32.141486,5.377262,10.430396,0.188545
min,-77.08,-179.997,-1.1,0.0,0.0,5.5,0.0,0.0,0.0,0.004505,0.085,0.0
25%,-18.653,-76.34975,14.5225,1.8,146.0,5.6,0.046,10.0,24.1,0.96875,5.3,0.9
50%,-3.5685,103.982,33.0,3.5,255.0,5.7,0.059,28.0,36.0,2.3195,6.7,1.0
75%,26.19075,145.02625,54.0,6.3,384.0,6.0,0.0755,66.0,54.0,4.7245,8.1,1.13
max,86.005,179.998,700.0,91.295,934.0,9.1,0.41,821.0,360.0,37.874,99.0,3.44


In [132]:
features = df[['Latitude', 'Longitude', 'Depth']]
target = df['Magnitude']

In [156]:
features_std =  (features - np.mean(features))/np.std(features)
target_std = (target - np.min(target))/(np.max(target)-np.min(target))

In [157]:
features_std.train = features_std[mask]
target_std.train = target_std[mask]
features_std.test = features_std[~mask]
target_std.test = target_std[~mask]

In [158]:
len(target_std.test)

4683

In [159]:
print('Std for features is: ', np.std(features_std))
print('Std for target is: ', np.std(target_std))

Std for features is:  Latitude     1.0
Longitude    1.0
Depth        1.0
dtype: float64
Std for target is:  0.1175157233301876


### Build Neural Network Model (Predicting Magnitude)

In [161]:
n_records, n_features = features_std.train.shape
n_hidden = 2

In [178]:
# Define Sigmoid function
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigPrime(x):
    return x*(1-x)

# Initializing the weights
weight_1 = np.random.normal(0,1/n_features**.5,size=(n_features, n_hidden))
weight_2 = np.random.normal(0,1/n_features**.5,size=(n_hidden))
# Initialize hyper parameter
learnrate = 0.5
epochs = 1000

# Loop Neural Networks

for i in range(1000):
    
    del_layer_2 = np.zeros(weight_2.shape)
    del_layer_1 = np.zeros(weight_1.shape)
    
    for x, y in zip(features_std.train.values, target_std.train.values):

        # Get weighted values

        layer_1 = sigmoid(np.dot(x, weight_1))
        layer_2 = sigmoid(np.dot(layer_1, weight_2))
        #print(layer_2)

        # Get error and error term for layer 2

        error_2 = y - layer_2        
        error_term_2 = error_2 * sigPrime(layer_2)

        # Get error and error term for layer 1

        error_1 = np.dot(error_term_2, weight_2)
        error_term_1 = error_1 * sigPrime(layer_1)

        # update the steps
        del_layer_2 += error_term_2 * layer_1
        del_layer_1 += error_term_1 * x[:,None]
    
    if (i % 100) == 0:
        print('Error Rate at %d is: %0.3f' % (i,error_2**2))
        transform_1 = sigmoid(np.dot(features_std.train, weight_1))
        ypred = sigmoid(np.dot(transform_1, weight_2))
        MSE = mean_squared_error(target_std.train, ypred)
        print('MSE for train: ', MSE)
    # update the weights
    weight_1 += learnrate * del_layer_1 / n_records
    weight_2 += learnrate * del_layer_2 / n_records

    if (i % 200) == 0:
        transform_1 = sigmoid(np.dot(features_std.test, weight_1))
        ypred = sigmoid(np.dot(transform_1, weight_2))
        MSE = mean_squared_error(target_std.test, ypred)
        print('*****************************************')
        print('MSE for test: ', MSE)
        print('*****************************************')

Error Rate at 0 is: 0.341
MSE for train:  0.24490439012
*****************************************
MSE for test:  0.237777542502
*****************************************
Error Rate at 100 is: 0.079
MSE for train:  0.0345673175269
Error Rate at 200 is: 0.042
MSE for train:  0.0195577192502
*****************************************
MSE for test:  0.0191502752918
*****************************************
Error Rate at 300 is: 0.029
MSE for train:  0.0161469325517
Error Rate at 400 is: 0.023
MSE for train:  0.0149499522578
*****************************************
MSE for test:  0.0146101576293
*****************************************
Error Rate at 500 is: 0.020
MSE for train:  0.014431468577
Error Rate at 600 is: 0.018
MSE for train:  0.0141770760792
*****************************************
MSE for test:  0.0138552206463
*****************************************
Error Rate at 700 is: 0.016
MSE for train:  0.0140414602641
Error Rate at 800 is: 0.015
MSE for train:  0.0139646730334
******