### Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
df_India= pd.read_csv('/content/eq_India.csv')
df_India.sample(4)

Unnamed: 0,index,time,latitude,longitude,depth,mag,place
251,675,2021-06-22T16:44:36.534Z,28.0137,92.2097,65.76,4.6,"56 km NNE of Tawang, India"
582,1719,2018-12-04T09:44:47.480Z,24.5152,94.8152,10.0,4.5,"76 km E of W?ngjing, India"
123,333,2022-05-09T22:06:30.273Z,9.7019,93.498,10.0,4.5,"232 km SSE of Port Blair, India"
562,1657,2019-02-17T02:58:32.530Z,22.4139,92.8472,10.0,4.8,"16 km WSW of Saiha, India"


In [11]:
df_India.shape

(1337, 7)

### About the dataset

- In this project, we will be working with earthquake data obtained from the United States Geological Survey (USGS) website (https://earthquake.usgs.gov/earthquakes/search/). The dataset contains information about earthquakes that have occurred around the world, including their magnitude, location, and time of occurrence.

- The data includes various properties such as the magnitude of the earthquake on the Richter scale, the latitude and longitude of the earthquake's epicenter, the depth of the earthquake, and the date and time of the earthquake's occurrence. The data is updated regularly to provide an accurate representation of current earthquake activity.

- This dataset provides valuable information for the scientific community to better understand earthquakes and their causes, as well as for emergency management agencies to respond to and prepare for the impacts of earthquakes. By analyzing this data, we can gain insights into patterns in earthquake activity, the relationship between earthquakes and tectonic plate movements, and the impact of earthquakes on populated areas.

## Making Connection with SQL Server

In [12]:
# Extract location and country information
df_India['location'] = df_India['place'].str.split(',').str.get(0)
df_India['Country'] = df_India['place'].str.split(',').str.get(1)
df_India.drop(columns='place', inplace=True)

In [13]:
df_India.sample(5)

Unnamed: 0,index,time,latitude,longitude,depth,mag,location,Country
524,1452,2019-06-23T02:35:10.239Z,9.9478,93.881,10.0,4.3,226 km SSE of Port Blair,India
1131,3965,2013-10-17T00:33:14.310Z,14.0582,93.0065,31.88,4.2,262 km N of Bamboo Flat,India
532,1476,2019-05-24T21:23:34.749Z,13.8733,92.9509,10.0,5.0,241 km N of Bamboo Flat,India
434,1157,2020-05-22T19:43:08.261Z,11.3192,93.5383,103.13,4.3,94 km ESE of Port Blair,India
84,250,2022-07-04T17:31:21.363Z,10.4304,94.2504,10.0,4.9,213 km SE of Port Blair,India


In [14]:
# Filter earthquakes with magnitude > 5 and > 6
eq_greater_than_5 = df_India[df_India['mag'] > 5]
eq_greater_than_6 = df_India[df_India['mag'] > 6]

In [15]:
df_India.mag.describe()

# So minimum magnitude is 3.2 and it ranges to 6.7

Unnamed: 0,mag
count,1337.0
mean,4.492072
std,0.363631
min,3.2
25%,4.2
50%,4.5
75%,4.7
max,6.7


In [16]:
df_India.columns

Index(['index', 'time', 'latitude', 'longitude', 'depth', 'mag', 'location',
       'Country'],
      dtype='object')

In [17]:
df_location=df_India[['latitude', 'longitude', 'depth', 'mag', 'location']]
df_location

Unnamed: 0,latitude,longitude,depth,mag,location
0,12.1356,93.5221,104.548,4.9,99 km ENE of Port Blair
1,24.8127,93.0266,10.000,5.1,2 km NE of Lakhipur
2,11.5055,95.1941,10.000,4.4,267 km E of Port Blair
3,23.5913,92.4156,10.000,4.1,34 km SW of Sairang
4,11.2892,91.8207,20.942,4.1,107 km WSW of Bamboo Flat
...,...,...,...,...,...
1332,25.2030,95.1530,105.300,4.6,83 km SE of Phek
1333,33.6240,74.8920,22.800,4.2,11 km SSE of Shup?yan
1334,35.7760,77.3460,62.900,4.6,106 km NNE of Thang
1335,10.6320,91.7370,10.000,4.6,158 km SW of Port Blair


In [18]:
import folium

- In this project, we have used `Folium`, a powerful Python library for creating interactive maps. Folium provides an easy-to-use interface for creating maps and visualizing data on top of them.

- One of the key features of Folium is its ability to visualize data on maps, making it a valuable tool for data analysis and presentation.
- In this project, we have used Folium to visualize the locations of earthquakes and with magnitudes greater than 5. By plotting the earthquakes on a map, we can easily see the geographical distribution of these events.

In [19]:
map = folium.Map(location=[df_location.latitude.mean(), df_location.longitude.mean()], zoom_start=14, control_scale=True)

In [20]:
for index, location_info in df_location.iterrows():
    folium.Marker([location_info["latitude"], location_info["longitude"]], popup=location_info["mag"]).add_to(map)
map

#map.save(outfile='eq_India.html')

In [21]:
map = folium.Map(location=[eq_greater_than_5.latitude.mean(), eq_greater_than_5.longitude.mean()], zoom_start=14, control_scale=True)

for index, location_info in eq_greater_than_5.iterrows():
    folium.Marker([location_info["latitude"], location_info["longitude"]], popup=location_info["mag"]).add_to(map)

map

#map.save(outfile='eq_India_more_than_5.html')

# Earthquake Prediction Model

In [25]:
# Prepare data for modeling
from sklearn.model_selection import train_test_split

X = df_India[['latitude', 'longitude', 'depth']]
y = df_India['mag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
from sklearn.ensemble import RandomForestRegressor

# Train a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [28]:
from sklearn.metrics import mean_squared_error

# Make predictions and evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.14982089552238823


In [37]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [35]:
# Define models
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42)
}

In [38]:
# Train and evaluate models
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[model_name] = {
        "MSE": mean_squared_error(y_test, y_pred),
        "R2": r2_score(y_test, y_pred),
        "MAE": mean_absolute_error(y_test, y_pred)
    }

In [32]:
# Print results
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print(f"  MSE: {metrics['MSE']:.4f}")
    print(f"  R2: {metrics['R2']:.4f}")
    print(f"  MAE: {metrics['MAE']:.4f}")
    print("-" * 20)


Model: Random Forest
  MSE: 0.1498
  R2: -0.1568
  MAE: 0.2867
--------------------
Model: Linear Regression
  MSE: 0.1251
  R2: 0.0339
  MAE: 0.2675
--------------------
Model: Decision Tree
  MSE: 0.2870
  R2: -1.2160
  MAE: 0.3948
--------------------


In [39]:
# Calculate distance to a specific location (e.g., a major fault line)
import geopy.distance

fault_line_location = (37.7749, -122.4194)  # Example coordinates
df_India['distance_to_fault'] = df_India.apply(
    lambda row: geopy.distance.geodesic(
        (row['latitude'], row['longitude']), fault_line_location
    ).km,
    axis=1,
)

# Create interaction terms
df_India['lat_lon_interaction'] = df_India['latitude'] * df_India['longitude']

In [40]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

In [43]:
best_model

In [41]:
from sklearn.svm import SVR

svr_model = SVR(kernel='rbf')
svr_model.fit(X_train, y_train)

In [44]:
import pandas as pd

# Classify magnitude into categories
def classify_magnitude(mag):
    if mag >= 6.0:
        return 'High'
    elif mag >= 4.0:
        return 'Medium'
    else:
        return 'Low'

df_India['magnitude_category'] = df_India['mag'].apply(classify_magnitude)

# Prepare data for classification
X = df_India[['latitude', 'longitude', 'depth']]
y = df_India['magnitude_category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classification model (e.g., RandomForestClassifier)
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Use classification metrics (accuracy, precision, recall, F1-score)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.9813
Precision: 0.9832
Recall: 0.9813
F1 Score: 0.9819


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
