In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


# 1. Load and Prepare the Data  #
data = pd.read_csv('/content/drive/MyDrive/MTU/ai-project/creditcard.csv')
data = data.sort_values('Time')

# Extract features and labels
features = data.drop(columns=['Class', 'Time']).values
labels = data['Class'].values

# Scale features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Global variable for the number of features (needed for model input shape)
n_features = features_scaled.shape[1]

# --------------------------------------#
# 2. Function to Create Sequences       #
# --------------------------------------#
def create_sequences(data, labels, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length + 1):
        X.append(data[i:i+seq_length])
        # Label a sequence as fraudulent if any transaction in it is fraud
        y.append(1 if labels[i:i+seq_length].sum() > 0 else 0)
    return np.array(X), np.array(y)


# 3. Custom HyperModel that Tunes Sequence Length     #
#    and Other Hyperparameters                        #
class LSTMHyperModel(kt.HyperModel):
    def build(self, hp):
        # Tune the sequence length: choose from 5, 10, or 20
        seq_length = hp.Choice('sequence_length', [5, 10, 20])

        model = Sequential()
        # First LSTM layer
        units1 = hp.Int('units_lstm1', min_value=32, max_value=128, step=32)
        model.add(LSTM(units1, input_shape=(seq_length, n_features), return_sequences=True))
        dropout1 = hp.Float('dropout1', min_value=0.1, max_value=0.5, step=0.1)
        model.add(Dropout(dropout1))

        # Second LSTM layer
        units2 = hp.Int('units_lstm2', min_value=16, max_value=64, step=16)
        model.add(LSTM(units2))
        dropout2 = hp.Float('dropout2', min_value=0.1, max_value=0.5, step=0.1)
        model.add(Dropout(dropout2))

        # Dense output layer
        model.add(Dense(1, activation='sigmoid'))

        # Tune learning rate
        learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            loss='binary_crossentropy',
            metrics=[
                'accuracy',
                tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall'),
                tf.keras.metrics.AUC(name='auc')
            ]
        )
        return model

    def fit(self, hp, model, *args, **kwargs):
        # Retrieve the chosen sequence length
        seq_length = hp.get('sequence_length')
        # Generate sequences using the tunable sequence length
        X_seq, y_seq = create_sequences(features_scaled, labels, seq_length)
        # Split the data into training and validation sets
        X_train, X_val, y_train, y_val = train_test_split(
            X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq
        )
        # Apply SMOTE on the training set
        n_samples, seq_len, n_features_local = X_train.shape
        X_train_flat = X_train.reshape(n_samples, seq_len * n_features_local)
        smote = SMOTE(random_state=42)
        X_train_res, y_train_res = smote.fit_resample(X_train_flat, y_train)
        X_train_res = X_train_res.reshape(-1, seq_length, n_features_local)

        return model.fit(
            X_train_res, y_train_res,
            validation_data=(X_val, y_val),
            epochs=10,
            batch_size=64,
            **kwargs
        )

In [4]:
# 4. Set Up and Run the Tuner  #
hypermodel = LSTMHyperModel()

tuner = kt.RandomSearch(
    hypermodel,
    objective='val_auc',
    max_trials=10,         # Increase for a more thorough search
    executions_per_trial=1,
    directory='hyperparam_tuning',
    project_name='credit_card_fraud_lstm'
)

# Since our HyperModel.fit() method handles data generation,
# we don't need to pass x and y to tuner.search().
tuner.search()

# -----------------------------#
# 5. Evaluate the Best Model   #
# -----------------------------#
# Get the best hyperparameters and model
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
best_seq_length = best_hp.get('sequence_length')

# For final evaluation, generate a test set using the best sequence length.
X_seq_all, y_seq_all = create_sequences(features_scaled, labels, best_seq_length)
X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(
    X_seq_all, y_seq_all, test_size=0.2, random_state=42, stratify=y_seq_all
)
# Apply SMOTE on the training set
n_samples, seq_len, n_features_local = X_train_final.shape
X_train_final_flat = X_train_final.reshape(n_samples, seq_len * n_features_local)
smote = SMOTE(random_state=42)
X_train_final_res, y_train_final_res = smote.fit_resample(X_train_final_flat, y_train_final)
X_train_final_res = X_train_final_res.reshape(-1, best_seq_length, n_features_local)

# Retrieve the best model and evaluate on the test set
best_model = tuner.get_best_models(num_models=1)[0]
loss, accuracy, precision, recall, auc_metric = best_model.evaluate(X_test_final, y_test_final, verbose=0)
print("Best Model Evaluation on Test Set:")
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc_metric:.4f}")


Trial 10 Complete [00h 12m 38s]
val_auc: 0.9920414686203003

Best val_auc So Far: 0.9998045563697815
Total elapsed time: 02h 07m 12s


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


Best Model Evaluation on Test Set:
Loss: 0.0073
Accuracy: 0.9978
Precision: 0.9364
Recall: 0.9947
AUC: 0.9998


In [5]:
# -----------------------------------------#
# 5. Print All Trials and Best Parameters  #
# -----------------------------------------#

print("\nAll Trial Results:")
for trial in tuner.oracle.trials.values():
    print(f"Trial ID: {trial.trial_id}, Score: {trial.score}, Hyperparameters: {trial.hyperparameters.values}")

best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(best_hp.values)


All Trial Results:
Trial ID: 00, Score: 0.998674750328064, Hyperparameters: {'sequence_length': 10, 'units_lstm1': 96, 'dropout1': 0.30000000000000004, 'units_lstm2': 16, 'dropout2': 0.2, 'learning_rate': 0.00012337256785879854}
Trial ID: 01, Score: 0.982997477054596, Hyperparameters: {'sequence_length': 5, 'units_lstm1': 128, 'dropout1': 0.1, 'units_lstm2': 32, 'dropout2': 0.30000000000000004, 'learning_rate': 0.00010406284012562844}
Trial ID: 02, Score: 0.9994115829467773, Hyperparameters: {'sequence_length': 10, 'units_lstm1': 64, 'dropout1': 0.30000000000000004, 'units_lstm2': 32, 'dropout2': 0.30000000000000004, 'learning_rate': 0.0006201914963139159}
Trial ID: 03, Score: 0.9933164119720459, Hyperparameters: {'sequence_length': 5, 'units_lstm1': 96, 'dropout1': 0.1, 'units_lstm2': 64, 'dropout2': 0.5, 'learning_rate': 0.0003987720809096887}
Trial ID: 04, Score: 0.9992073774337769, Hyperparameters: {'sequence_length': 20, 'units_lstm1': 96, 'dropout1': 0.2, 'units_lstm2': 64, 'dro

In [12]:
best_model.save('/content/drive/MyDrive/MTU/ai-project/best_lstm_model.keras')

In [13]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('/content/drive/MyDrive/MTU/ai-project/best_lstm_model.keras')

# Check the model architecture
loaded_model.summary()


  saveable.load_own_variables(weights_store.get(inner_path))


In [2]:
import tensorflow as tf
from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
from tensorflow_privacy.privacy.membership_inference_attack import attack_input_data


NotFoundError: dlopen(/opt/miniconda3/envs/ds/lib/python3.9/site-packages/tensorflow-plugins/libmetal_plugin.dylib, 0x0006): symbol not found in flat namespace '__ZN10tensorflow8internal10LogMessage16VmoduleActivatedEPKci'

In [1]:
import tensorflow_privacy

ModuleNotFoundError: No module named 'tensorflow_privacy'