<a href="https://colab.research.google.com/github/Vageesh-Jayaraman/Earthquake-Prediction/blob/main/Earthquake_Prediction_using_Random_Forest_Regressor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing and filtering dataset

In [None]:
dataset=pd.read_csv("Earthquake.csv")

In [None]:
dataset[:5]

Unnamed: 0,latitude,longitude,depth,magType,nst,gap,rms,magNst,mag
0,27.319,91.51,10.0,mb,205.0,37.4,0.89,118.0,5.5
1,35.017,73.005,63.8,mb,40.0,95.8,0.94,7.0,4.0
2,24.357,94.807,124.8,mwb,206.0,17.3,0.77,,5.6
3,30.686,83.769,10.0,mb,50.0,40.6,1.08,9.0,4.4
4,14.001,92.862,42.6,mb,117.0,68.1,0.82,56.0,5.0


# Separating it into dependent and independent variables

In [None]:
X = dataset.iloc[:,:-1]

In [None]:
X.head()

Unnamed: 0,latitude,longitude,depth,magType,nst,gap,rms,magNst
0,27.319,91.51,10.0,mb,205.0,37.4,0.89,118.0
1,35.017,73.005,63.8,mb,40.0,95.8,0.94,7.0
2,24.357,94.807,124.8,mwb,206.0,17.3,0.77,
3,30.686,83.769,10.0,mb,50.0,40.6,1.08,9.0
4,14.001,92.862,42.6,mb,117.0,68.1,0.82,56.0


In [None]:
Y = dataset.iloc[:,-1]

In [None]:
Y.head()

0    5.5
1    4.0
2    5.6
3    4.4
4    5.0
Name: mag, dtype: float64

# Encoding independent variables

## Searching for unique values and replacing duplicates

In [None]:
unique_values = X['magType'].unique()
unique_values

array(['mb', 'mwb', 'mwc', 'ml', 'mw', 'ms', 'md', 'mww', 'mwr', 'Mb'],
      dtype=object)

In [None]:
X['magType'] = X['magType'].replace('Mb', 'mb')
X['magType'] = X['magType'].replace('mww', 'mw')

In [None]:
unique_values = X['magType'].unique()
unique_values

array(['mb', 'mwb', 'mwc', 'ml', 'mw', 'ms', 'md', 'mwr'], dtype=object)

## OneHotEncoding

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[3])],remainder='passthrough')
X= ct.fit_transform(X)

In [None]:
X = pd.DataFrame(X)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.319,91.51,10.0,205.0,37.4,0.89,118.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.017,73.005,63.8,40.0,95.8,0.94,7.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,24.357,94.807,124.8,206.0,17.3,0.77,
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.686,83.769,10.0,50.0,40.6,1.08,9.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.001,92.862,42.6,117.0,68.1,0.82,56.0


# Dealing with null values

## Checking columns with null values

In [None]:
X.isna().sum()

0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11    2857
12    1720
13       7
14    1432
dtype: int64

## Replacing null values with mean

In [None]:
from sklearn.impute import SimpleImputer
columns_to_impute = X.columns[11:]
imputer = SimpleImputer(strategy='mean')
X[columns_to_impute] = imputer.fit_transform(X[columns_to_impute])


In [None]:
X = pd.DataFrame(X)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.319,91.51,10.0,205.0,37.4,0.89,118.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.017,73.005,63.8,40.0,95.8,0.94,7.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,24.357,94.807,124.8,206.0,17.3,0.77,14.62973
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.686,83.769,10.0,50.0,40.6,1.08,9.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.001,92.862,42.6,117.0,68.1,0.82,56.0


# Splitting into training and test set

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

# **----------------------------------Data Preprocessing is over----------------------------------**




# Predicting the test set

In [None]:
from sklearn.ensemble import RandomForestRegressor
rad = RandomForestRegressor(n_estimators=10,random_state=1)
rad.fit(X_train,y_train)

In [None]:
y_pred = rad.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse}")

mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")

r_squared = r2_score(y_test, y_pred)
print(f"R-squared (R²): {r_squared}")


Mean Squared Error (MSE): 0.07391648979591837
Root Mean Squared Error (RMSE): 0.27187587203707203
Mean Absolute Error (MAE): 0.20330068027210887
R-squared (R²): 0.6879643124232275
