In [None]:
pip install numpy pandas matplotlib scikit-learn tensorflow networkx sklearn spektral torch torch-geometric torch-scatter torch-sparse torch-cluster

In [None]:
#Dataset Used: https://www.kaggle.com/datasets/mohitkr05/global-significant-earthquake-database-from-2150bc
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the dataset

url = "D:\APURAV\K. K. Wagh\Study\BE\Semester VII\Final Year Project Sem VII\dataset\Worldwide-Earthquake-database.csv"
df = pd.read_csv(url)

# Define target variable
df['SIGNIFICANT'] = df.apply(lambda x: 1 if (
    x['DEATHS'] > 0 or
    x['DAMAGE_MILLIONS_DOLLARS'] > 0 or
    x['EQ_MAG_MW'] >= 7.5 or
    x['INTENSITY'] == 'X' or
    x['FLAG_TSUNAMI'] == 1
) else 0, axis=1)


# Drop unnecessary columns
df = df.drop(columns=['I_D', 'DEATHS_DESCRIPTION', 'MISSING', 'MISSING_DESCRIPTION',
                      'INJURIES_DESCRIPTION', 'DAMAGE_DESCRIPTION', 'HOUSES_DESTROYED_DESCRIPTION',
                      'HOUSES_DAMAGED_DESCRIPTION', 'TOTAL_DEATHS_DESCRIPTION', 'TOTAL_MISSING_DESCRIPTION',
                      'TOTAL_INJURIES_DESCRIPTION', 'TOTAL_DAMAGE_DESCRIPTION', 'TOTAL_HOUSES_DESTROYED_DESCRIPTION',
                      'TOTAL_HOUSES_DAMAGED_DESCRIPTION'])


# Visualize distribution of magnitude types
magnitude_columns = ['EQ_MAG_MS', 'EQ_MAG_MW', 'EQ_MAG_MB', 'EQ_MAG_ML', 'EQ_MAG_MFA', 'EQ_MAG_UNK']

plt.figure(figsize=(12, 6))
for col in magnitude_columns:
    plt.hist(df[col].dropna(), bins=20, alpha=0.5, label=col)

plt.xlabel('Magnitude')
plt.ylabel('Frequency')
plt.title('Distribution of Earthquake Magnitudes')
plt.legend()
plt.show()


# Prepare data for machine learning model
X = df.drop(columns=['SIGNIFICANT'])
y = df['SIGNIFICANT']


# Identify numerical and categorical columns
numerical_columns = X.select_dtypes(include=['number']).columns
categorical_columns = X.select_dtypes(include=['object']).columns


# Create column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), numerical_columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ])


# Create a pipeline with preprocessing and model
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train the model
pipeline.fit(X_train, y_train)


# Evaluate the model
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))


# Convert the sparse matrix to a dense array for validation_split
X_train_preprocessed = pipeline.named_steps['preprocessor'].transform(X_train).toarray()


# Train a simple neural network
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train_preprocessed.shape[1]),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_preprocessed, y_train, epochs=10, batch_size=64, validation_split=0.2)


# Convert the sparse matrix to a dense array for evaluation
X_test_preprocessed = pipeline.named_steps['preprocessor'].transform(X_test).toarray()


# Evaluate the neural network
accuracy_nn = model.evaluate(X_test_preprocessed, y_test, verbose=0)[1]
print(f"Neural Network Accuracy: {accuracy_nn}")


In [None]:
# Import necessary libraries
import torch
from torch_geometric.data import Data, DataLoader
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.metrics import accuracy_score, classification_report


# Preprocess data for GNN
df['LATITUDE'] = pd.to_numeric(df['LATITUDE'], errors='coerce')
df['LONGITUDE'] = pd.to_numeric(df['LONGITUDE'], errors='coerce')
geo_data = df[['LATITUDE', 'LONGITUDE']].values

# Create a list of all nodes
all_nodes = df.index.tolist()

# Create edge_index based on the 'SIGNIFICANT' column
significant_nodes = df.index[df['SIGNIFICANT'] == 1].tolist()
non_significant_nodes = df.index[df['SIGNIFICANT'] == 0].tolist()

# Create edges connecting significant and non-significant nodes
edges = torch.tensor([significant_nodes + non_significant_nodes, all_nodes], dtype=torch.long)

gnn_data = Data(x=torch.tensor(geo_data, dtype=torch.float), edge_index=edges, y=torch.tensor(df['SIGNIFICANT'].values, dtype=torch.float))



# Define GNN model
class GNNModel(torch.nn.Module):
    def __init__(self):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(2, 16)
        self.conv2 = GCNConv(16, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.sigmoid(x)

# Instantiate GNN model
gnn_model = GNNModel()

# Loss function and optimizer for GNN
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(gnn_model.parameters(), lr=0.01)

# Train GNN model
gnn_model.train()
for epoch in range(10):  # adjust the number of epochs as needed
    optimizer.zero_grad()
    out = gnn_model(gnn_data)
    loss = criterion(out, gnn_data.y.view(-1, 1))
    loss.backward()
    optimizer.step()

# Evaluate GNN model
gnn_model.eval()
with torch.no_grad():
    gnn_output = gnn_model(gnn_data)
    y_pred_gnn = (gnn_output > 0.5).float().numpy()

# Combine predictions from all models
combined_predictions = np.hstack([y_pred.reshape(-1, 1) for y_pred in [y_pred, y_pred_gnn]])

# Implement a meta-classifier (e.g., logistic regression) for combining predictions
meta_classifier = LogisticRegression()
meta_classifier.fit(combined_predictions, y_test)

# Evaluate the meta-classifier
combined_predictions_test = np.hstack([y_pred_test.reshape(-1, 1) for y_pred_test in [y_pred, y_pred_gnn]])
y_pred_meta = meta_classifier.predict(combined_predictions_test)

# Report combined results
accuracy_combined = accuracy_score(y_test, y_pred_meta)
print(f"Combined Model Accuracy: {accuracy_combined}")
print(classification_report(y_test, y_pred_meta))


In [None]:
pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html

In [None]:
pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html

In [None]:
pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html

In [None]:
pip uninstall torch-scatter torch-sparse torch-cluster