# Temporal transformations

## Original code

In [None]:
import random
import networkx as nx
import pandas as pd
import numpy as np
import ipywidgets as widgets
import os
import matplotlib.pyplot as plt
import warnings
from tabulate import tabulate
from tqdm import trange
from IPython import get_ipython
from IPython.display import display
from time import monotonic
from pprint import pprint
from google.colab import drive

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
class CellTimer:
    def __init__(self):
        self.start_time = None

    def start(self, *args, **kwargs):
        self.start_time = monotonic()

    def stop(self, *args, **kwargs):
        try:
            delta = round(monotonic() - self.start_time, 2)
            print(f"\n⏱️ Execution time: {delta}s")
        except TypeError:
            # The `stop` will be called when the cell that
            # defines `CellTimer` is executed, but `start`
            # was never called, leading to a `TypeError` in
            # the subtraction. Skip it
            pass


timer = CellTimer()
ipython = get_ipython()
ipython.events.register("pre_run_cell", timer.start)
ipython.events.register("post_run_cell", timer.stop)

In [None]:
df = pd.read_csv("../../HI-Small_Trans.csv", parse_dates=["Timestamp"])

Rename columns for clarity and standard formatting 

In [None]:
df.rename(
    columns={
        "Timestamp": "timestamp",
        "From Bank": "from_bank",
        "Account": "from_account",
        "To Bank": "to_bank",
        "Account.1": "to_account",
        "Amount Received": "received_amount",
        "Receiving Currency": "received_currency",
        "Amount Paid": "sent_amount",
        "Payment Currency": "sent_currency",
        "Payment Format": "payment_type",
        "Is Laundering": "is_laundering",
    },
    inplace=True,
)

Drop duplicates

In [None]:
df.drop_duplicates(inplace=True)

### Preprocessing function for whole dataset

In [None]:
np.random.seed(42)

def df_label_encoder(df, columns):
      le = preprocessing.LabelEncoder()
      for i in columns:
          df[i] = le.fit_transform(df[i].astype(str))
      return df

def preprocess(df):
  """
  Preprocesses the entire dataframe, including node mappings, label encoding,
  and time processing which is independent of data split.
  """
  ## Create unique account - ID mapping ##
  # Get unique account-bank combos (a couple of acct numbers found at multiple banks)
  df['from_account_id'] = df['from_bank'].astype(str) + '_' + df['from_account'].astype(str)
  df['to_account_id'] = df['to_bank'].astype(str) + '_' + df['to_account'].astype(str)

  # Get list of unique account ids
  df = df.reset_index(drop=True)
  from_nodes = df["from_account_id"].drop_duplicates().reset_index(drop=True)
  to_nodes = df["to_account_id"].drop_duplicates().reset_index(drop=True)
  all_nodes = pd.concat([from_nodes, to_nodes]).drop_duplicates().reset_index(drop=True)

  # Map node identifiers to integer indices
  node_mapping = {node: idx for idx, node in enumerate(all_nodes)}
  df["from_account_idx"] = df["from_account_id"].map(node_mapping)
  df["to_account_idx"] = df["to_account_id"].map(node_mapping)

  ## Label encode categorical vars ##
  # Use label encoding and let model learn (instead of one-hot embeddings)
  df = df_label_encoder(df, ["payment_type", "sent_currency", "received_currency", "from_bank", "to_bank"])

  ## Currency conversion ## (not using for now, just looking at temporal feats)
  # usd_conversion = currency.get_usd_conversion(df)
  # df['Amount Paid (USD)'] = df.apply(lambda row: row['Amount Paid'] * usd_conversion.get(row['Payment Currency'], 1), axis=1)
  # df['Amount Received (USD)'] = df.apply(lambda row: row['Amount Received'] * usd_conversion.get(row['Receiving Currency'], 1), axis=1)

  ## Time transformations ##
  # Extract items from timestamp
  df["time_of_day"] = df["timestamp"].dt.time
  df["hour_of_day"] = df["timestamp"].dt.hour
  df["day_of_week"] = df["timestamp"].dt.weekday # 0=Monday,...,6=Sunday
  df["seconds_since_midnight"] = (
    df["timestamp"].dt.hour * 3600 +  # Convert hours to seconds
    df["timestamp"].dt.minute * 60 +  # Convert minutes to seconds
    df["timestamp"].dt.second         # Keep seconds
  )

  # Transform timestamp to raw int unix
  df["timestamp_int"] = df["timestamp"].astype(int) / 10**9

  # Apply cyclical encoding
  df["day_sin"] = np.sin(2 * np.pi * df["day_of_week"] / 7)
  df["day_cos"] = np.cos(2 * np.pi * df["day_of_week"] / 7)
  df["time_of_day_sin"] = np.sin(2 * np.pi * df["seconds_since_midnight"] / 86400)
  df["time_of_day_cos"] = np.cos(2 * np.pi * df["seconds_since_midnight"] / 86400)

  # Create binary weekend indicator
  df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)

  df.drop(columns=["from_account","to_account"], inplace=True)

  return df

### Preprocessing function for train-val-test separately

In [None]:
def preprocess_split(df, scaler_time, scaler_amount):
    """
    Normalizes timestamp and transaction amounts using given scalers.
    """
    # Sort transactions by time
    df = df.sort_values(by=["from_account_idx", "timestamp"])

    # Apply scaling
    df["timestamp_scaled"] = scaler_time.transform(df[["timestamp_int"]])
    # df[["sent_amount_scaled", "received_amount_scaled"]] = scaler_amount.transform(df[["sent_amount", "received_amount"]])

    # Compute time difference between transactions per account and convert to seconds
    df["time_diff_from_acct"] = df.groupby("from_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)
    df["time_diff_to_acct"] = df.groupby("to_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)

    return df

### Run preprocessing

In [None]:
# Preprocess entire df
df_original = df.copy()
df_transformed = preprocess(df_original)

# Split train and test data using random stratification
train_df_rs, test_df_rs = train_test_split(
    df_transformed,
    test_size=0.2,
    stratify=df_transformed["is_laundering"],
    random_state=42
)

### Random stratified
### Scaling the data
# Choose a scaler (MinMax or StandardScaler)
scaler1 = MinMaxScaler() # Choose a scaler (MinMax or StandardScaler)
scaler2 = StandardScaler()
# Fit scalar to training data before preprocessing
# Fit only on training data, then transform train/val/test separately
# This makes scaling consistent (not confusing model)
scaler1.fit(train_df_rs[["timestamp_int"]])
scaler2.fit(train_df_rs[["sent_amount", "received_amount"]])  # Fit only on training data
### Preprocess
train_df_rs, test_df_rs = normalize(train_df_rs, scaler1, scaler2), normalize(test_df_rs, scaler1, scaler2)

## Tranformations for model pipeline

### Applied to whole dataset

Combine bank + account for unique IDs as there were a few duplicate account numbers at different banks in the dataset:

In [None]:
## Create unique account - ID mapping ##
# Get unique account-bank combos (a couple of acct numbers found at multiple banks)
df['from_account_id'] = df['from_bank'].astype(str) + '_' + df['from_account'].astype(str)
df['to_account_id'] = df['to_bank'].astype(str) + '_' + df['to_account'].astype(str)
df.drop(columns=["from_account","to_account"], inplace=True)

Map new node ids (from_bank + from_account, to_bank + to_account) to integer indices:

In [None]:
# Get list of unique account ids
df = df.reset_index(drop=True)
from_nodes = df["from_account_id"].drop_duplicates().reset_index(drop=True)
to_nodes = df["to_account_id"].drop_duplicates().reset_index(drop=True)
all_nodes = pd.concat([from_nodes, to_nodes]).drop_duplicates().reset_index(drop=True)

# Map node identifiers to integer indices
node_mapping = {node: idx for idx, node in enumerate(all_nodes)}
df["from_account_idx"] = df["from_account_id"].map(node_mapping)
df["to_account_idx"] = df["to_account_id"].map(node_mapping)

Extract items from timestamp:

In [None]:
# Extract items from timestamp
df["hour_of_day"] = df["timestamp"].dt.hour
df["day_of_week"] = df["timestamp"].dt.weekday # 0=Monday,...,6=Sunday
df["seconds_since_midnight"] = (
df["timestamp"].dt.hour * 3600 +  # Convert hours to seconds
df["timestamp"].dt.minute * 60 +  # Convert minutes to seconds
df["timestamp"].dt.second         # Keep seconds
)
# Transform timestamp to raw int unix
df["timestamp_int"] = df["timestamp"].astype(int) / 10**9

Cyclincal encoding:

In [None]:
# Apply cyclical encoding
df["day_sin"] = np.sin(2 * np.pi * df["day_of_week"] / 7)
df["day_cos"] = np.cos(2 * np.pi * df["day_of_week"] / 7)
df["time_of_day_sin"] = np.sin(2 * np.pi * df["seconds_since_midnight"] / 86400)
df["time_of_day_cos"] = np.cos(2 * np.pi * df["seconds_since_midnight"] / 86400)

Create binary weekend indicator

In [None]:
df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)

### Applied on train/val/test separately

In [None]:
def preprocess_split(df, scaler_time, scaler_amount):
    """
    Normalizes timestamp and transaction amounts using given scalers.
    """
    # Sort transactions by time
    df = df.sort_values(by=["from_account_idx", "timestamp"])

    # Apply scaling
    df["timestamp_scaled"] = scaler_time.transform(df[["timestamp_int"]])
    
    # Compute time difference between transactions per account and convert to seconds
    df["time_diff_from_acct"] = df.groupby("from_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)
    df["time_diff_to_acct"] = df.groupby("to_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)

    return df

## In Abhitay's pipeline

In [None]:
class model_pipeline:

    def __init__(self, df_path, random_state_):

        self.df_path = df_path
        self.df = pd.read_csv(self.df_path)
        self.random_state_ = random_state_
        
        # Track if preprocessing steps have been completed
        self.preprocessed = {
            "renamed": False,
            "duplicates_removed": False,
            "unique_ids_created": False,
            "currency_normalized": False,
            "time_features_extracted": False,
            "cyclical_encoded": False,
            "weekend_encoded": False,
            "date_converted": False,
            "features_encoded": False,
            "neighbor_context_computed": False,
            "normalized": False
        }
        
        # For ML pipeline

        self.X = None
        self.y = None

        self.X_train = None
        self.y_train = None

        self.X_test = None
        self.y_test = None

        self.X_val = None
        self.y_val = None

        self.y_pred = None
        self.y_proba = None

        self.model = None
    
    ### Data tidying ###
    
    def rename_columns(self):
        """Renames the columns of self.df to standardized names."""
        column_mapping = {
            "Timestamp": "timestamp",
            "From Bank": "from_bank",
            "Account": "from_account",
            "To Bank": "to_bank",
            "Account.1": "to_account",
            "Amount Received": "received_amount",
            "Receiving Currency": "received_currency",
            "Amount Paid": "sent_amount",
            "Payment Currency": "sent_currency",
            "Payment Format": "payment_type",
            "Is Laundering": "is_laundering",
        }
        
        # Ensure required columns exist
        missing_columns = [col for col in column_mapping.keys() if col not in self.df.columns]
        if missing_columns:
            raise KeyError(f"Missing expected columns in dataset: {missing_columns}")

        self.df.rename(columns=column_mapping, inplace=True)
        self.preprocessed["renamed"] = True
        
    def drop_duplicates(self):
        self.df.drop_duplicates(inplace=True)
        self.preprocessed["duplicates_removed"] = True
        
    def create_unique_ids(self):
        """Create unique account - ID mapping."""
        if not self.preprocessed["renamed"]:
            raise RuntimeError("Columns must be renamed (run rename()) before creating unique IDs.")

        # Get unique account-bank combos (a couple of acct numbers found at multiple banks)
        self.df['from_account_id'] = self.df['from_bank'].astype(str) + '_' + self.df['from_account'].astype(str)
        self.df['to_account_id'] = self.df['to_bank'].astype(str) + '_' + self.df['to_account'].astype(str)
        self.df.drop(columns=["from_account","to_account"], inplace=True)
        
        # Get list of unique account ids
        self.df = self.df.reset_index(drop=True)
        from_nodes = self.df["from_account_id"].drop_duplicates().reset_index(drop=True)
        to_nodes = self.df["to_account_id"].drop_duplicates().reset_index(drop=True)
        all_nodes = pd.concat([from_nodes, to_nodes]).drop_duplicates().reset_index(drop=True)

        # Map node identifiers to integer indices
        node_mapping = {node: idx for idx, node in enumerate(all_nodes)}
        df["from_account_idx"] = df["from_account_id"].map(node_mapping)
        df["to_account_idx"] = df["to_account_id"].map(node_mapping)
        
        self.preprocessed["unique_ids_created"] = True
        
    ### Summary statistics ###
       
    def df_summary(self):
        print('DATA HEAD')
        display(self.df.head())
        print('FEATURE TYPE')
        display(self.df.info())

    def y_statistics(self):
        print('Normalised Value Count: ')
        print(self.df['is_laundering'].value_counts(normalize=True))

    ### Feature Transformation ###

    def currency_normalization(self):
        if "sent_currency" not in self.df.columns or "received_currency" not in self.df.columns:
            raise KeyError("Currency columns missing. Need to run rename() so in proper format")
        
        usd_conversion = currency.get_usd_conversion(self.df_path)
        self.df['sent_amount_USD'] = self.df.apply(lambda row: row['sent_amount'] * usd_conversion.get(row['sent_currency'], 1), axis=1)
        self.df['received_amount_USD'] = self.df.apply(lambda row: row['received_amount'] * usd_conversion.get(row['received_currency'], 1), axis=1)

    def extract_time_features(self):
        if "timestamp" not in self.df.columns:
            raise KeyError("Missing 'timestamp' column, run rename().")
        if not isinstance(self.df["timestamp"], datetime.datetime):
            self.df["timestamp"] = pd.to_datetime(self.df["timestamp"])
        
        # Extract items from timestamp
        self.df["hour_of_day"] = self.df["timestamp"].dt.hour
        self.df["day_of_week"] = self.df["timestamp"].dt.weekday # 0=Monday,...,6=Sunday
        self.df["seconds_since_midnight"] = (
            self.df["timestamp"].dt.hour * 3600 +  # Convert hours to seconds
            self.df["timestamp"].dt.minute * 60 +  # Convert minutes to seconds
            self.df["timestamp"].dt.second         # Keep seconds
        )
        
        self.preprocessed["time_features_extracted"] = True
        
    def cyclical_encoding(self):
        if not self.preprocessed["time_features_extracted"]:
            raise RuntimeError("Time features missing, run `extract_time_features` first.")

        self.df["day_sin"] = np.sin(2 * np.pi * self.df["day_of_week"] / 7)
        self.df["day_cos"] = np.cos(2 * np.pi * self.df["day_of_week"] / 7)
        self.df["time_of_day_sin"] = np.sin(2 * np.pi * self.df["seconds_since_midnight"] / 86400)
        self.df["time_of_day_cos"] = np.cos(2 * np.pi * self.df["seconds_since_midnight"] / 86400)
        
        self.preprocessed["cyclical_encoded"] = True
        
    def binary_weekend(self):
        if "day_of_week" not in self.df.columns:
            raise KeyError("Day-of-week feature missing. Run `extract_time_features` first.")
        self.df["is_weekend"] = self.df["day_of_week"].isin([5, 6]).astype(int)
        self.preprocessed["weekend_encoded"] = True
    
    def date_to_unix(self):
        """Converts timestamp to Unix time."""
        if "timestamp" not in self.df.columns:
            raise KeyError("Missing 'timestamp' column.")
        self.df["timestamp"] = self.df["timestamp"].astype(int) / 10**9
        self.preprocessed["date_converted"] = True
        
    def label_encoding(self, features_to_encode):
        if not self.preprocessed["renamed"]:
            raise RuntimeError("Run rename() before encoding categorical features.")
        
        for col in features_to_encode:
            if col not in self.df.columns:
                raise KeyError(f"Column '{col}' not found in the dataset.")
            self.df[col] = LabelEncoder().fit_transform(self.df[col])
        
        self.preprocessed["features_encoded"] = True

    def neighbor_context(self):
        if not self.preprocessed["unique_ids_created"]:
            raise RuntimeError("Unique account IDs must be created before computing network features.")
        
        G = nx.DiGraph()

        for _, row in self.df.iterrows():
            G.add_edge(row['from_account_idx'], row['to_account_idx'], weight=row['sent_amount_USD'])

        self.df['degree_centrality'] = self.df['from_account_idx'].map(nx.degree_centrality(G))
        self.df['pagerank'] = self.df['from_account_idx'].map(nx.pagerank(G))
        
        self.preprocessed["neighbor_context_computed"] = True
        
    ## Applied to train and test separately
    def normalize_train_test(self):
        """Normalizes timestamp and transaction amounts separately for train and test sets."""
        if not self.preprocessed["date_converted"]:
            raise RuntimeError("Timestamps must be converted before normalization.")
        if not self.preprocessed["currency_normalized"]:
            raise RuntimeError("Convert currency to USD first")
        if self.X_train is None:
            raise RuntimeError("Split data into train and test first")
        
        # Fit scalers only on training data
        self.scaler_time.fit(self.X_train[["timestamp"]])
        self.scaler_amount.fit(self.X_train[["sent_amount_USD", "received_amount_USD"]])
        
        # Apply scaling separately on train and test data
        self.X_train["timestamp_scaled"] = self.scaler_time.transform(self.X_train[["timestamp"]])
        self.X_test["timestamp_scaled"] = self.scaler_time.transform(self.X_test[["timestamp"]])
        
        self.X_train[["sent_amount_USD_scaled", "received_amount_USD_scaled"]] = self.scaler_amount.transform(
            self.X_train[["sent_amount_USD", "received_amount_USD"]]
        )
        self.X_test[["sent_amount_USD_scaled", "received_amount_USD_scaled"]] = self.scaler_amount.transform(
            self.X_test[["sent_amount_USD", "received_amount_USD"]]
        )
        
        # Compute time differences separately for train and test
        self.X_train["time_diff_from_acct"] = self.X_train.groupby("from_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)
        self.X_train["time_diff_to_acct"] = self.X_train.groupby("to_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)
        
        self.X_test["time_diff_from_acct"] = self.X_test.groupby("from_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)
        self.X_test["time_diff_to_acct"] = self.X_test.groupby("to_account_idx")["timestamp"].diff().dt.total_seconds().fillna(0)

        self.preprocessed["normalized"] = True
        
    ### Preprocessing Pipeline ###
    
    def run_preprocessing(self):
        """Runs all preprocessing steps in the correct order."""
        print("Running preprocessing pipeline...")
        
        try:
            self.rename_columns()
            self.drop_duplicates()
            self.create_unique_ids()
            self.currency_normalization()
            self.extract_time_features()
            self.cyclical_encoding()
            self.binary_weekend()
            self.date_to_unix()
            self.label_encoding()
            self.neighbor_context()
            print("Preprocessing completed successfully!")
        except Exception as e:
            print(f"Error in preprocessing: {e}")

    def generate_tensor(self,edge_features):
        self.train_node_features = torch.tensor(self.X_train[edge_features].values, dtype=torch.float)
        labels = torch.tensor(self.y_train.values, dtype=torch.long)
        edge_index = torch.tensor(self.X_train[['from_account_idx', 'to_account_idx']].values.T, dtype=torch.long)
        self.train_data = Data(x=self.train_node_features, edge_index=edge_index, y=labels)

        self.test_node_features = torch.tensor(self.X_test[edge_features].values, dtype=torch.float)
        labels = torch.tensor(self.y_test.values, dtype=torch.long)
        edge_index = torch.tensor(self.X_test[['from_account_idx', 'to_account_idx']].values.T, dtype=torch.long)
        self.test_data = Data(x=self.test_node_features, edge_index=edge_index, y=labels)
    
    def split_x_y(self, X_cols, y_col):
        self.X = self.df[X_cols]
        self.y = self.df[y_col]

    def split_train_test(self, test_size_):
        # Random Split For now
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=test_size_, random_state=self.random_state_, stratify=self.y)
        # print(self.X_train.shape)
        # print(self.X_test.shape)
        # print(self.y_train.shape)
        # print(self.y_test.shape)
    
    def random_forest_classifier(self, param):      
        self.model = RandomForestClassifier(**param)
        self.model.fit(self.X_train, self.y_train)

    def xgboost_classifier(self,param):
        self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
        self.dtest = xgb.DMatrix(self.X_test, label=self.y_test)
        self.model = xgb.train(param, self.dtrain)

    def training_gnn_model(self, learning_rate, epoch_,gnn_model):
            
        self.model = globals()[gnn_model](input_dim=self.train_node_features.shape[1], hidden_dim=16, output_dim=2)
        # Define optimizer and loss function
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        criterion = nn.CrossEntropyLoss()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(device)
        self.train_data = self.train_data.to(device)

        # Training loop
        epochs = epoch_
        for epoch in range(epochs):
            self.model.train()
            optimizer.zero_grad()
            out = self.model(self.train_data.x, self.train_data.edge_index)
            loss = criterion(out, self.train_data.y)
            loss.backward()
            optimizer.step()
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch} - Loss: {loss.item():.4f}")

    def predict_model_gnn(self):
        self.model.eval()
        with torch.no_grad():
            out_probs = self.model(self.test_data.x, self.test_data.edge_index)
            self.y_proba = out_probs.cpu().numpy()
            self.y_pred = out_probs.argmax(dim=1).cpu().numpy()

    def predict_model(self, xgboost_flag = 'null'):
        if xgboost_flag == 'null':
            self.y_pred = self.model.predict(self.X_test)
            self.y_proba = self.model.predict_proba(self.X_test)
        else:
            self.y_proba = self.model.predict(self.dtest)
            self.y_pred = (self.y_proba > 0.5).astype(int)

    def result_metrics(self):

        print(classification_report(self.y_test, self.y_pred, digits=4))


        cm = confusion_matrix(self.y_test, self.y_pred)
        accuracy = balanced_accuracy_score(self.y_test, self.y_pred) 
        mcc = matthews_corrcoef(self.y_test, self.y_pred)
        logloss = log_loss(self.y_test, self.y_proba) if self.y_proba is not None else None

        print(f"Balanced Accuracy: {accuracy:.4f}")
        print(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}")
        if logloss:
            print(f"Log Loss: {logloss:.4f}")


        if self.y_proba is not None:

            if len(self.y_proba.shape) == 1:
                fpr, tpr, _ = roc_curve(self.y_test, self.y_proba)
                roc_auc = roc_auc_score(self.y_test, self.y_proba)
                precision, recall, _ = precision_recall_curve(self.y_test, self.y_proba)
                pr_auc = auc(recall, precision)
            else:
                fpr, tpr, _ = roc_curve(self.y_test, self.y_proba[:, 1])
                roc_auc = roc_auc_score(self.y_test, self.y_proba[:, 1])
                precision, recall, _ = precision_recall_curve(self.y_test, self.y_proba[:, 1])
                pr_auc = auc(recall, precision)

            print(f"AUC-ROC Score: {roc_auc:.4f}")
            print(f"Precision-Recall AUC: {pr_auc:.4f}")


            fig, axes = plt.subplots(1, 3, figsize=(18, 5))

            class_labels = ["Licit", "Illicit"] 
            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=axes[0])
            axes[0].set_title("Confusion Matrix")
            axes[0].set_xlabel("Predicted Label")
            axes[0].set_ylabel("True Label")
            axes[0].set_xticklabels(class_labels)
            axes[0].set_yticklabels(class_labels)

            axes[1].plot(fpr, tpr, label=f'ROC AUC = {roc_auc:.4f}')
            axes[1].plot([0, 1], [0, 1], linestyle="--", color="gray")  # Baseline
            axes[1].set_title("ROC Curve")
            axes[1].set_xlabel("False Positive Rate")
            axes[1].set_ylabel("True Positive Rate")
            axes[1].legend()


            axes[2].plot(recall, precision, label=f'PR AUC = {pr_auc:.4f}')
            axes[2].set_title("Precision-Recall Curve")
            axes[2].set_xlabel("Recall")
            axes[2].set_ylabel("Precision")
            axes[2].legend()

            plt.tight_layout()
            plt.show()


IndentationError: expected an indented block (1676916179.py, line 191)