In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
#from stellargraph import StellarGraph
#from stellargraph.layer import GAT, GCN


# Load the STEAD dataset
dataset_url = "D:\APURAV\K. K. Wagh\Study\BE\Semester VII\Final Year Project Sem VII\dataset\STEAD\merge.csv"
stead_data = pd.read_csv(dataset_url)


# Data Preprocessing
selected_columns = ['network_code', 'receiver_code', 'receiver_latitude', 'receiver_longitude',
                    'receiver_elevation_m', 'source_latitude', 'source_longitude', 'source_depth_km',
                    'source_magnitude', 'trace_start_time', 'trace_category']
data = stead_data[selected_columns]


# Convert trace_start_time to datetime format
data['trace_start_time'] = pd.to_datetime(data['trace_start_time'])


# Extract features from the timestamp
data['year'] = data['trace_start_time'].dt.year
data['month'] = data['trace_start_time'].dt.month
data['day'] = data['trace_start_time'].dt.day
data['hour'] = data['trace_start_time'].dt.hour
data['minute'] = data['trace_start_time'].dt.minute
data['second'] = data['trace_start_time'].dt.second


# Drop unnecessary columns
data = data.drop(['trace_start_time'], axis=1)


# Create a binary target variable indicating earthquake or non-earthquake
data['target'] = np.where(data['trace_category'] == 'earthquake', 1, 0)


# Feature engineering
# Feature 1: Time of Day (morning, afternoon, evening, night)
data['time_of_day'] = pd.cut(data['hour'], bins=[0, 6, 12, 18, 24], labels=['night', 'morning', 'afternoon', 'evening'])

# Feature 2: Distance from the earthquake source
data['distance_from_source'] = np.sqrt((data['receiver_latitude'] - data['source_latitude'])**2 +
                                       (data['receiver_longitude'] - data['source_longitude'])**2)

# Feature 3: Magnitude-weighted distance
data['weighted_distance'] = data['distance_from_source'] * data['source_magnitude']

# Feature 4: Duration of the seismic signal
data['signal_duration'] = data['minute'] * 60 + data['second']


# Drop the original columns used for feature engineering
data = data.drop(['hour', 'minute', 'second'], axis=1)



# Convert categorical columns to numerical representations
categorical_columns = ['network_code', 'receiver_code', 'time_of_day']
for column in categorical_columns:
    data[column] = pd.Categorical(data[column])
    data[column] = data[column].cat.codes

    
# Create a graph from the data
graph = StellarGraph.from_pandas(data, node_features=["receiver_latitude", "receiver_longitude",
                                                       "receiver_elevation_m", "source_latitude",
                                                       "source_longitude", "source_depth_km",
                                                       "source_magnitude"],
                                 edge_features=["distance_from_source", "weighted_distance"],
                                 node_type_default="receiver_code", edge_type_default="trace_category")

# Train-test split
X = data.drop(['trace_category', 'target'], axis=1)
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Convert data to StellarGraph instances
G_train = graph.node_features(X_train_scaled)
G_test = graph.node_features(X_test_scaled)


# Build the GNN model
model = models.Sequential()
model.add(GCN(layer_sizes=[32], activations=["relu"], generator=graph, dropout=0.5))
model.add(layers.Dense(units=16, activation="relu"))
model.add(layers.Dense(units=1, activation="sigmoid"))


# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


# Define callbacks (e.g., early stopping to prevent overfitting)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


# Train the model
model.fit(G_train, y_train, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping])


# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(G_test, y_test)
print(f'Test Accuracy: {test_accuracy}')


# Make predictions for user input
def predict_earthquake_probability(user_input):
    # Process user input (similar to preprocessing steps above)
    user_input = pd.DataFrame(user_input, index=[0])
    
    # Feature engineering for user input
    
    # Scaling
    user_input_scaled = scaler.transform(user_input)
    
    # Convert to StellarGraph instances
    G_user_input = graph.node_features(user_input_scaled)
    
    # Make prediction
    probability = model.predict(G_user_input)
    
    return probability[0][0]


# Example usage
user_location_input = {
    'network_code': 'XYZ',
    'receiver_code': 'ABC',
    'receiver_latitude': 37.7749,
    'receiver_longitude': -122.4194,
    'receiver_elevation_m': 10.0,
    'source_latitude': 34.0522,
    'source_longitude': -118.2437,
    'source_depth_km': 10.0,
    'source_magnitude': 5.0,
    'year': 2024,
    'month': 2,
    'day': 5,
    'time_of_day': 'morning'
}

predicted_probability = predict_earthquake_probability(user_location_input)
print(f'Predicted Probability of Earthquake: {predicted_probability}')


2nd try

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.nn.functional as F

# Load the dataset
data = pd.read_csv("D:\APURAV\K. K. Wagh\Study\BE\Semester VII\Final Year Project Sem VII\dataset\STEAD\merge.csv")

# Explore the dataset
print(data.head())
print(data.info())
print(data.describe())

# Handle missing values
data.dropna(inplace=True)

# Handle outliers (if necessary)
# Perform data consistency checks and corrections

# Feature engineering
# Example:
# Convert source_origin_time to datetime
data['source_origin_time'] = pd.to_datetime(data['source_origin_time'])

# Scaling, encoding, dimensionality reduction (if necessary)
# Example:
# Feature scaling using Min-Max normalization
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['source_latitude', 'source_longitude']])

# Convert scaled_data back to a DataFrame
scaled_data_df = pd.DataFrame(scaled_data, columns=['source_latitude', 'source_longitude'])

# Dimensionality reduction using PCA (Principal Component Analysis) or other methods if necessary

# Save preprocessed data
scaled_data_df.to_csv("preprocessed_data.csv", index=False)


In [None]:
# Implement GNN architecture using TensorFlow or PyTorch
# Example:

class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNN, self).__init__()
        # Define GNN layers
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Define forward pass
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate GNN model
input_dim = 2  # Example: latitude and longitude
hidden_dim = 64
output_dim = 1  # Probability of earthquake occurrence
model = GNN(input_dim, hidden_dim, output_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Visualize seismic data distributions
sns.pairplot(data[['source_latitude', 'source_longitude', 'source_depth_km']])
plt.show()

# Display GNN architecture diagrams (if necessary)

# Use interactive plots for seismic waveforms and earthquake characteristics (if necessary)
# Example: Plot seismic waveforms over time using Plotly or Bokeh


3rd try

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv("D:\APURAV\K. K. Wagh\Study\BE\Semester VII\Final Year Project Sem VII\dataset\STEAD\merge.csv")

# Drop unnecessary columns
data.drop(['network_code', 'receiver_code', 'receiver_type', 'receiver_elevation_m',
           'p_status', 'p_weight', 'p_travel_sec', 's_status', 's_weight', 'source_id',
           'source_origin_uncertainty_sec', 'source_error_sec', 'source_gap_deg',
           'source_horizontal_uncertainty_km', 'source_depth_uncertainty_km',
           'source_magnitude_type', 'source_magnitude_author', 'source_mechanism_strike_dip_rake',
           'source_distance_deg', 'trace_start_time', 'trace_category', 'trace_name'], axis=1, inplace=True)

# Convert timestamp columns to datetime objects
data['source_origin_time'] = pd.to_datetime(data['source_origin_time'])

# Extract features from timestamp columns
data['hour'] = data['source_origin_time'].dt.hour
data['minute'] = data['source_origin_time'].dt.minute
data['second'] = data['source_origin_time'].dt.second

# Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[['hour', 'minute', 'second']])
data[['hour', 'minute', 'second']] = scaled_features

# Handle missing values
data.fillna(0, inplace=True)

# Encode categorical variables if needed

# Final processed dataset
processed_data = data.copy()


In [None]:
class GraphConvolutionalNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GraphConvolutionalNetwork, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        return x

# Initialize the GNN model
input_dim = len(processed_data.columns)  # Adjust based on your processed data
hidden_dim = 64
output_dim = 1  # Adjust based on your prediction task
model = GraphConvolutionalNetwork(input_dim, hidden_dim, output_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
