<a href="https://colab.research.google.com/github/Vageesh-Jayaraman/Earthquake-Prediction/blob/main/Earthquake_Prediction_using_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **DATA PREPROCESSING**

# Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing and filtering dataset

In [3]:
dataset=pd.read_csv("Earthquake.csv")

In [4]:
dataset[:5]

Unnamed: 0,latitude,longitude,depth,magType,nst,gap,rms,magNst,mag
0,27.319,91.51,10.0,mb,205.0,37.4,0.89,118.0,5.5
1,35.017,73.005,63.8,mb,40.0,95.8,0.94,7.0,4.0
2,24.357,94.807,124.8,mwb,206.0,17.3,0.77,,5.6
3,30.686,83.769,10.0,mb,50.0,40.6,1.08,9.0,4.4
4,14.001,92.862,42.6,mb,117.0,68.1,0.82,56.0,5.0


# Separating it into dependent and independent variables

In [5]:
X = dataset.iloc[:,:-1]

In [6]:
X.head()

Unnamed: 0,latitude,longitude,depth,magType,nst,gap,rms,magNst
0,27.319,91.51,10.0,mb,205.0,37.4,0.89,118.0
1,35.017,73.005,63.8,mb,40.0,95.8,0.94,7.0
2,24.357,94.807,124.8,mwb,206.0,17.3,0.77,
3,30.686,83.769,10.0,mb,50.0,40.6,1.08,9.0
4,14.001,92.862,42.6,mb,117.0,68.1,0.82,56.0


In [7]:
Y = dataset.iloc[:,-1]

In [8]:
Y.head()

0    5.5
1    4.0
2    5.6
3    4.4
4    5.0
Name: mag, dtype: float64

# Encoding independent variables

## Searching for unique values and replacing duplicates

In [9]:
unique_values = X['magType'].unique()
unique_values

array(['mb', 'mwb', 'mwc', 'ml', 'mw', 'ms', 'md', 'mww', 'mwr', 'Mb'],
      dtype=object)

In [10]:
X['magType'] = X['magType'].replace('Mb', 'mb')
X['magType'] = X['magType'].replace('mww', 'mw')

In [11]:
unique_values = X['magType'].unique()
unique_values

array(['mb', 'mwb', 'mwc', 'ml', 'mw', 'ms', 'md', 'mwr'], dtype=object)

## OneHotEncoding

In [12]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[3])],remainder='passthrough')
X= ct.fit_transform(X)

In [13]:
X = pd.DataFrame(X)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.319,91.51,10.0,205.0,37.4,0.89,118.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.017,73.005,63.8,40.0,95.8,0.94,7.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,24.357,94.807,124.8,206.0,17.3,0.77,
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.686,83.769,10.0,50.0,40.6,1.08,9.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.001,92.862,42.6,117.0,68.1,0.82,56.0


# Dealing with null values

## Checking columns with null values

In [14]:
X.isna().sum()

0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11    2857
12    1720
13       7
14    1432
dtype: int64

## Replacing null values with mean

In [15]:
from sklearn.impute import SimpleImputer
columns_to_impute = X.columns[11:]
imputer = SimpleImputer(strategy='mean')
X[columns_to_impute] = imputer.fit_transform(X[columns_to_impute])


In [16]:
X = pd.DataFrame(X)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.319,91.51,10.0,205.0,37.4,0.89,118.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.017,73.005,63.8,40.0,95.8,0.94,7.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,24.357,94.807,124.8,206.0,17.3,0.77,14.62973
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.686,83.769,10.0,50.0,40.6,1.08,9.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.001,92.862,42.6,117.0,68.1,0.82,56.0


# Splitting into training and test set

In [17]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

# Feature Scaling

In [18]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train.iloc[:, 8:] = sc.fit_transform(X_train.iloc[:, 8:])
X_test.iloc[:, 8:] = sc.transform(X_test.iloc[:, 8:])

# **ARTIFICIAL NEURAL NETWORK**

In [19]:
import tensorflow as tf
tf.__version__

'2.13.0'

## Initialising ANN

In [20]:
ann = tf.keras.models.Sequential()

## Adding the first hidden layer

In [21]:
ann.add(tf.keras.layers.Dense(units = 32, activation ="relu"))

## Adding the second hidden layer

In [22]:
ann.add(tf.keras.layers.Dense(units = 32, activation ="relu"))

## Adding the output layer

In [23]:
ann.add(tf.keras.layers.Dense(units = 1))

## Compiling ANN

In [24]:
ann.compile(optimizer = 'adam',loss='mean_squared_error')

## Training the ANN model on the Training set

In [25]:
ann.fit(X_train,y_train,batch_size=32,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7e8029312440>

## Predicting the results of the Test set

In [26]:
y_pred=ann.predict(X_test)



In [27]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse:}')
print(f'Root Mean Squared Error (RMSE): {rmse:}')
print(f'Mean Absolute Error (MAE): {mae:}')
print(f'R-squared (R²): {r2:}')


Mean Squared Error (MSE): 0.0711930964495914
Root Mean Squared Error (RMSE): 0.26682034489444656
Mean Absolute Error (MAE): 0.20335348696286987
R-squared (R²): 0.6994610152253953
