In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix



In [47]:
class EarthquakeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(EarthquakeLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Define LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Define output layer
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Dropout layer
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        # x shape: (batch_size, seq_length, input_size)
        # mask shape: (batch_size, seq_length)
        
        batch_size = x.size(0)
        
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        
        # Get sequence lengths from mask
        lengths = mask.sum(dim=1).cpu()
        
        # Pack padded sequence
        packed_x = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        
        # LSTM forward pass
        packed_out, (hidden, _) = self.lstm(packed_x, (h0, c0))
        
        # Unpack sequence
        out, _ = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)
        
        # Get the last valid output for each sequence using the mask
        last_outputs = []
        for i in range(batch_size):
            last_idx = lengths[i] - 1
            last_outputs.append(out[i, last_idx])
            
        # Stack the last outputs
        last_outputs = torch.stack(last_outputs)
        
        # Apply dropout
        last_outputs = self.dropout(last_outputs)
        
        # Final linear layer
        output = self.fc(last_outputs)
        
        return output


In [48]:
def load_and_preprocess(csv_path):
    """
    Load the earthquake dataset and preprocess it.

    Parameters:
    -----------
    csv_path : str
        Path to the earthquake dataset CSV file

    Returns:
    --------
    pandas.DataFrame
        Processed earthquake dataset
    """
    df = pd.read_csv(csv_path)

    # Drop the imbalanced magnitudes
    df.drop(df[df["mag"] < 4.0].index, inplace=True)

    # Sort the dfFrame by time
    df = df.sort_values('time').reset_index(drop=True)

    df["time"] = pd.to_datetime(df["time"].str[:10])

    # Extract date features
    df['year'] = df['time'].dt.year
    df['month'] = df['time'].dt.month
    df['day'] = df['time'].dt.day

    # Drop columns with too many missing values
    df.drop(columns=["dmin", "horizontalError", "depthError", "magError"], inplace=True)

    # Fill missing values
    for col in ["nst", "gap", "magNst", "rms"]:
        df[col] = df[col].fillna(df[col].median())


    # We can drop the id, updated, type, place, magNst, status, net, locationSource,
    # and magSource columns as they are not informative
    df.drop(columns=["id", "updated", "type", "place", "magNst",
                       "status", "locationSource", "magSource", "net",
                     "nst", "gap", "rms", "magType" ], inplace=True)

    # Create a target variable
    df['target'] = 0  # Initialize target column

    # Create the target variable
    for i in range(len(df) - 1):
        current_time = df.loc[i, 'time']
        # Filter events within the next two weeks and within 0.5 degrees proximity
        mask = (df['time'] > current_time) & (df['time'] <= current_time + pd.Timedelta(weeks=2)) & (
                    np.abs(df['latitude'] - df.loc[i, 'latitude']) <= 0.5) & (
                           np.abs(df['longitude'] - df.loc[i, 'longitude']) <= 0.5)

        # If there is at least one significant earthquake, set the target to 1
        if df[mask]['mag'].max() >= 5.0:
            df.loc[i, 'target'] = 1

    return df


In [49]:
def find_recent_earthquakes(data, mag_threshold = 4.5, proximity_threshold = 0.8, time_threshold = 2):
    """
    Find recent earthquakes within 0.8 degrees latitude/longitude and 2 weeks of the current earthquake.
    Add a new column `close_event` to the DataFrame with the total number of recent earthquakes.

    Parameters:
    -----------
    data : pandas.DataFrame
        Full earthquake dataset
    mag_threshold : float
        Minimum magnitude of the earthquake to be considered
    proximity_threshold : float
        Maximum distance in degrees latitude/longitude to be considered
    time_threshold : int
        Maximum number of weeks to be considered

    Returns:
    --------
    pandas.DataFrame
        Earthquake dataset with a new column `close_event` indicating if there was a recent earthquake nearby
    """
    time_threshold = pd.Timedelta(weeks=time_threshold)

    # Ensure the DataFrame is sorted by time for efficiency
    data = data.sort_values("time").reset_index(drop=True)

    # Initialize a new column
    data["close_event"] = 0

    # Iterate through each row
    for i, row in data.iterrows():
        # Define current earthquake properties
        current_time = row["time"]
        current_lat = row["latitude"]
        current_lon = row["longitude"]

        # Find earthquakes that meet the criteria
        recent_quakes = data[
            (data["time"] < current_time) &
            (data["time"] >= current_time - time_threshold) &
            (np.abs(data["latitude"] - current_lat) <= proximity_threshold) &
            (np.abs(data["longitude"] - current_lon) <= proximity_threshold) &
            (data["mag"] > mag_threshold)
            ]

        # Update the new column
        data.loc[i, "close_event"] = recent_quakes.shape[0]

    return data

In [50]:
df = load_and_preprocess("resources/90_25_turkey.csv")

print(df.describe())

                                time     latitude    longitude        depth  \
count                           4128  4128.000000  4128.000000  4128.000000   
mean   2010-10-01 20:57:12.558139392    38.171928    33.770976    19.450909   
min              1990-01-02 00:00:00    35.647300    25.510900     0.000000   
25%              2001-07-16 00:00:00    36.910000    27.784625    10.000000   
50%              2011-11-07 12:00:00    38.086900    31.825000    10.000000   
75%              2021-04-17 00:00:00    39.100000    38.676500    16.000000   
max              2025-01-21 00:00:00    42.494000    45.190000   176.100000   
std                              NaN     1.548517     6.212623    26.697618   

               mag         year        month          day       target  
count  4128.000000  4128.000000  4128.000000  4128.000000  4128.000000  
mean      4.383866  2010.291667     6.048207    15.408188     0.172481  
min       4.000000  1990.000000     1.000000     1.000000     0.00000

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4128 entries, 0 to 4127
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   time       4128 non-null   datetime64[ns]
 1   latitude   4128 non-null   float64       
 2   longitude  4128 non-null   float64       
 3   depth      4128 non-null   float64       
 4   mag        4128 non-null   float64       
 5   year       4128 non-null   int32         
 6   month      4128 non-null   int32         
 7   day        4128 non-null   int32         
 8   target     4128 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int32(3), int64(1)
memory usage: 242.0 KB


In [52]:
df = find_recent_earthquakes(df)

print(df["close_event"].value_counts())

close_event
0     3094
1      325
2      164
3       90
4       67
      ... 
66       1
65       1
61       1
64       1
17       1
Name: count, Length: 61, dtype: int64


In [53]:
def add_largest_nearby_eq(data, mag_col='mag', lat_col='latitude', lon_col='longitude', time_col='time'):
    """
    Add a feature `largest_nearby_eq` indicating the magnitude of the largest earthquake
    within a 0.5-degree radius and 20-year history for each record in the DataFrame.

    Parameters:
    -----------
    data : pandas.DataFrame
        Earthquake dataset with columns for magnitude, latitude, longitude, and time.
    mag_col : str
        Column name for magnitude.
    lat_col : str
        Column name for latitude.
    lon_col : str
        Column name for longitude.
    time_col : str
        Column name for time.

    Returns:
    --------
    pandas.DataFrame
        DataFrame with an added column `largest_nearby_eq`.
    """
    # Convert time column to datetime if not already
    data[time_col] = pd.to_datetime(data[time_col])
    data = data.sort_values(time_col).reset_index(drop=True)

    # Initialize the new feature
    largest_eq = np.zeros(len(data))

    # Iterate over each earthquake
    for i, row in data.iterrows():
        current_time = row[time_col]
        current_lat, current_lon = row[lat_col], row[lon_col]

        # Filter earthquakes within the last 20 years but before the current event
        recent_data = data[
            (data[time_col] < current_time) &
            (data[time_col] >= current_time - pd.Timedelta(days=20 * 365))
        ]

        # Filter by proximity (latitude and longitude)
        nearby_quakes = recent_data[
            (np.abs(recent_data[lat_col] - current_lat) <= 0.5) &
            (np.abs(recent_data[lon_col] - current_lon) <= 0.5)
        ]

        # Find the largest earthquake magnitude
        largest_eq[i] = nearby_quakes[mag_col].max() if not nearby_quakes.empty else 0

    # Add the feature to the DataFrame
    data["largest_nearby_eq"] = largest_eq
    return data

In [54]:
df = add_largest_nearby_eq(df)

print(df["largest_nearby_eq"].value_counts())


largest_nearby_eq
6.0    325
5.5    265
5.6    244
5.2    210
4.8    194
7.1    179
5.1    167
4.7    163
6.2    154
4.9    140
4.6    140
6.4    138
5.7    135
0.0    133
5.9    125
4.5    122
6.1    112
6.7    105
5.4     98
4.1     93
5.8     92
5.0     91
4.4     90
5.3     88
6.3     78
6.6     72
4.2     66
7.5     62
7.0     56
4.3     53
7.8     45
7.2     36
4.0     21
6.5     20
7.6     16
Name: count, dtype: int64


In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4128 entries, 0 to 4127
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   time               4128 non-null   datetime64[ns]
 1   latitude           4128 non-null   float64       
 2   longitude          4128 non-null   float64       
 3   depth              4128 non-null   float64       
 4   mag                4128 non-null   float64       
 5   year               4128 non-null   int32         
 6   month              4128 non-null   int32         
 7   day                4128 non-null   int32         
 8   target             4128 non-null   int64         
 9   close_event        4128 non-null   int64         
 10  largest_nearby_eq  4128 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int32(3), int64(2)
memory usage: 306.5 KB


In [56]:
def prepare_time_based_sequences(data, window_days, features, train_split=0.8, 
                               min_events=5, handling_method='skip'):
    """
    Prepare sequences for LSTM model based on time windows with proper handling of initial sequences.
    
    Args:
        data: DataFrame containing earthquake data (must have 'time' column)
        window_days: Number of days to look back for each sequence
        features: List of feature columns to use
        train_split: Proportion of data to use for training (default: 0.8)
        min_events: Minimum number of events required in a window
        handling_method: How to handle sequences with insufficient data
                        'skip': Skip sequences with insufficient data
                        'pad': Pad sequences with zeros
                        'minimum_window': Use available data if it meets minimum requirements
    
    Returns:
        train_X, train_y, test_X, test_y, train_mask, test_mask, sequence_stats
    """
    # Sort data by time to ensure temporal order
    data = data.sort_values('time').reset_index(drop=True)
    
    # Initialize lists for sequences
    X = []
    y = []
    sequence_lengths = []
    dates = []
    skipped_sequences = 0
    
    # Calculate the start of the dataset
    dataset_start = data['time'].iloc[0]
    
    for i in range(len(data)):
        end_time = data['time'].iloc[i]
        start_time = end_time - pd.Timedelta(days=window_days)
        
        # Get all earthquakes within the time window
        mask = (data['time'] < end_time) & (data['time'] >= start_time)
        window_data = data[mask][features].values
        
        # Handle different cases based on the specified method
        if handling_method == 'skip':
            if len(window_data) >= min_events:
                X.append(window_data)
                y.append(data['target'].iloc[i])
                sequence_lengths.append(len(window_data))
                dates.append(end_time)
            else:
                skipped_sequences += 1
                
        elif handling_method == 'pad':
            if len(window_data) > 0:
                X.append(window_data)
                y.append(data['target'].iloc[i])
                sequence_lengths.append(len(window_data))
                dates.append(end_time)
                
        elif handling_method == 'minimum_window':
            # If we're near the start of the dataset, use available data
            if start_time < dataset_start:
                available_data = data[data['time'] < end_time][features].values
                if len(available_data) >= min_events:
                    X.append(available_data)
                    y.append(data['target'].iloc[i])
                    sequence_lengths.append(len(available_data))
                    dates.append(end_time)
                else:
                    skipped_sequences += 1
            else:
                if len(window_data) >= min_events:
                    X.append(window_data)
                    y.append(data['target'].iloc[i])
                    sequence_lengths.append(len(window_data))
                    dates.append(end_time)
                else:
                    skipped_sequences += 1
    
    # Convert dates to numpy datetime64
    dates = np.array(dates)
    
    # Find maximum sequence length for padding
    max_seq_length = max(sequence_lengths)
    
    # Pad sequences to same length
    X_padded = []
    for seq in X:
        if len(seq) < max_seq_length:
            padding = np.zeros((max_seq_length - len(seq), len(features)))
            seq_padded = np.vstack((padding, seq))  # Pad at the beginning
        else:
            seq_padded = seq[-max_seq_length:]  # Take the most recent events
        X_padded.append(seq_padded)
    
    # Convert to numpy arrays
    X = np.array(X_padded)
    y = np.array(y)
    
    # Calculate split index based on time
    split_time = dates[0] + (dates[-1] - dates[0]) * train_split
    split_idx = sum([1 for seq_time in dates if seq_time <= split_time])
    
    # Split into train and test sets
    train_X = X[:split_idx]
    test_X = X[split_idx:]
    train_y = y[:split_idx]
    test_y = y[split_idx:]
    
    # Convert to PyTorch tensors
    train_X = torch.FloatTensor(train_X)
    test_X = torch.FloatTensor(test_X)
    train_y = torch.LongTensor(train_y)
    test_y = torch.LongTensor(test_y)
    
    # Create mask for padded sequences
    train_mask = (train_X != 0).any(dim=2)
    test_mask = (test_X != 0).any(dim=2)
    
    # Compile sequence statistics
    sequence_stats = {
        'total_sequences': len(X),
        'skipped_sequences': skipped_sequences,
        'max_sequence_length': max_seq_length,
        'min_sequence_length': min(sequence_lengths),
        'avg_sequence_length': sum(sequence_lengths) / len(sequence_lengths),
        'train_sequences': len(train_X),
        'test_sequences': len(test_X)
    }
    
    return train_X, train_y, test_X, test_y, train_mask, test_mask, sequence_stats

# Example usage:
window_days = 30
min_events = 5

features = ['latitude', 'longitude', 'depth', 'mag', 'year', 'month', 'day', 'close_event', 'largest_nearby_eq']

# Try different handling methods
methods = ['skip', 'pad', 'minimum_window']
for method in methods:
    print(f"\nTrying method: {method}")
    train_X, train_y, test_X, test_y, train_mask, test_mask, stats = prepare_time_based_sequences(
        data=df,
        window_days=window_days,
        features=features,
        train_split=0.8,
        min_events=min_events,
        handling_method=method
    )
    
    print("Sequence Statistics:")
    for key, value in stats.items():
        print(f"{key}: {value}")


Trying method: skip
Sequence Statistics:
total_sequences: 3426
skipped_sequences: 702
max_sequence_length: 470
min_sequence_length: 5
avg_sequence_length: 42.5758902510216
train_sequences: 2073
test_sequences: 1353

Trying method: pad
Sequence Statistics:
total_sequences: 4110
skipped_sequences: 0
max_sequence_length: 470
min_sequence_length: 1
avg_sequence_length: 35.976885644768856
train_sequences: 2673
test_sequences: 1437

Trying method: minimum_window
Sequence Statistics:
total_sequences: 3426
skipped_sequences: 702
max_sequence_length: 470
min_sequence_length: 5
avg_sequence_length: 42.5758902510216
train_sequences: 2073
test_sequences: 1353


In [57]:
train_X, train_y, test_X, test_y, train_mask, test_mask, stats = prepare_time_based_sequences(
    data=df,
    window_days=30,
    features=['latitude', 'longitude', 'depth', 'mag', 'year', 'month', 'day', 'close_event', 'largest_nearby_eq'],
    train_split=0.8,
    min_events=5,
    handling_method='minimum_window'
)

In [58]:
# Scale the data
scaler = MinMaxScaler()
train_X_scaled = scaler.fit_transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
test_X_scaled = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)

In [59]:
from sklearn.metrics import roc_curve, auc


def train_evaluate_model(train_X, train_y, test_X, test_y, train_mask, test_mask, features, num_epochs=50):
    batch_size = 64

    # Create datasets with masks
    train_dataset = TensorDataset(train_X, train_y, train_mask)
    test_dataset = TensorDataset(test_X, test_y, test_mask)

    # Create data loaders (no shuffle to maintain temporal order)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Define device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Calculate class weights
    class_counts = np.bincount(train_y.numpy().astype(int))
    pos_weight = class_counts[0] / class_counts[1]
    pos_weight = torch.tensor([pos_weight], dtype=torch.float).to(device)

    # Define loss function
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    # Model parameters
    input_size = len(features)
    hidden_size = 64
    num_layers = 2
    output_size = 1  # Binary classification

    # Initialize model
    model = EarthquakeLSTM(input_size, hidden_size, num_layers, output_size)
    model.to(device)

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        epoch_loss = 0
        for sequences_batch, labels_batch, mask_batch in train_loader:
            sequences_batch = sequences_batch.to(device)
            labels_batch = labels_batch.to(device).float().unsqueeze(1)
            mask_batch = mask_batch.to(device)

            # Forward pass
            outputs = model(sequences_batch, mask_batch)
            loss = criterion(outputs, labels_batch)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_train_loss = epoch_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for sequences_batch, labels_batch, mask_batch in test_loader:
                sequences_batch = sequences_batch.to(device)
                labels_batch = labels_batch.to(device).float().unsqueeze(1)
                mask_batch = mask_batch.to(device)

                outputs = model(sequences_batch, mask_batch)
                loss = criterion(outputs, labels_batch)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(test_loader)

        # Print progress
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {avg_train_loss:.4f}, '
              f'Val Loss: {avg_val_loss:.4f}')

        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            # Save best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

    # Load best model for evaluation
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()

    # Evaluation
    with torch.no_grad():
        all_outputs = []
        all_labels = []
        for sequences_batch, labels_batch, mask_batch in test_loader:
            sequences_batch = sequences_batch.to(device)
            labels_batch = labels_batch.to(device)
            mask_batch = mask_batch.to(device)

            outputs = model(sequences_batch, mask_batch)
            outputs = torch.sigmoid(outputs)

            all_outputs.extend(outputs.cpu().numpy())
            all_labels.extend(labels_batch.cpu().numpy())

    # Convert to numpy arrays
    all_outputs = np.array(all_outputs).squeeze()
    all_labels = np.array(all_labels)

    # Binarize outputs
    threshold = 0.5
    predicted = (all_outputs >= threshold).astype(int)

    # Calculate metrics
    accuracy = accuracy_score(all_labels, predicted)
    precision = precision_score(all_labels, predicted, zero_division=0)
    recall = recall_score(all_labels, predicted, zero_division=0)
    f1 = f1_score(all_labels, predicted, zero_division=0)
    conf_matrix = confusion_matrix(all_labels, predicted)

    # Print metrics
    print("\nFinal Evaluation Metrics:")
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print('\nConfusion Matrix:')
    print(conf_matrix)

    # Calculate and print additional metrics
    tn, fp, fn, tp = conf_matrix.ravel()
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    print(f'\nSpecificity: {specificity:.4f}')
    
    # ROC curve
    fpr, tpr, _ = roc_curve(all_labels, all_outputs)
    roc_auc = auc(fpr, tpr)
    print(f'ROC AUC: {roc_auc:.4f}')

    return model, {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'specificity': specificity,
        'roc_auc': roc_auc,
        'confusion_matrix': conf_matrix
    }

# Usage example:
train_X, train_y, test_X, test_y, train_mask, test_mask, stats = prepare_time_based_sequences(
    data=df,
    window_days=30,
    features=features,
    train_split=0.8,
    min_events=5,
    handling_method='minimum_window'
)

model, metrics = train_evaluate_model(
    train_X=train_X,
    train_y=train_y,
    test_X=test_X,
    test_y=test_y,
    train_mask=train_mask,
    test_mask=test_mask,
    features=features,
    num_epochs=50
)

Using device: cpu
Epoch [1/50], Train Loss: 1.2113, Val Loss: 1.6788
Epoch [2/50], Train Loss: 1.1909, Val Loss: 1.6599
Epoch [3/50], Train Loss: 1.1885, Val Loss: 1.6506
Epoch [4/50], Train Loss: 1.1882, Val Loss: 1.6439
Epoch [5/50], Train Loss: 1.1888, Val Loss: 1.6393
Epoch [6/50], Train Loss: 1.1873, Val Loss: 1.6362
Epoch [7/50], Train Loss: 1.1868, Val Loss: 1.6333
Epoch [8/50], Train Loss: 1.1875, Val Loss: 1.6312
Epoch [9/50], Train Loss: 1.1865, Val Loss: 1.6299
Epoch [10/50], Train Loss: 1.1868, Val Loss: 1.6291
Epoch [11/50], Train Loss: 1.1868, Val Loss: 1.6280
Epoch [12/50], Train Loss: 1.1870, Val Loss: 1.6272
Epoch [13/50], Train Loss: 1.1863, Val Loss: 1.6264
Epoch [14/50], Train Loss: 1.1865, Val Loss: 1.6255
Epoch [15/50], Train Loss: 1.1862, Val Loss: 1.6252
Epoch [16/50], Train Loss: 1.1862, Val Loss: 1.6255
Epoch [17/50], Train Loss: 1.1860, Val Loss: 1.6248
Epoch [18/50], Train Loss: 1.1856, Val Loss: 1.6240
Epoch [19/50], Train Loss: 1.1855, Val Loss: 1.6233
Epo

  model.load_state_dict(torch.load('best_model.pth'))



Final Evaluation Metrics:
Accuracy: 0.7332
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Confusion Matrix:
[[992   0]
 [361   0]]

Specificity: 1.0000
ROC AUC: 0.7655
