In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import tensorflow as tf
import keras_tuner as kt
from keras import layers
from keras_tuner import BayesianOptimization

from sklearn.utils import class_weight
from sklearn.metrics import classification_report
import scikitplot as skplt
import matplotlib.pyplot as plt

from modules.tuner_classes import CustomHyperModel, HyperparameterLogger
from utils.image_manipulation import resize_and_pad, swap_labels
from utils.log_to_csv import get_all_trials

In [None]:
device = tf.config.list_physical_devices('GPU')
print(f"Using device: {device}")

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomContrast(0.2)
])

preprocess_input = tf.keras.applications.efficientnet_v2.preprocess_input

IMG_SIZE = (300, 300)
BATCH_SIZE = 64

In [None]:
train_dir = 'data/train'
test_dir = 'data/test'

train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=50)
test_dataset = tf.keras.utils.image_dataset_from_directory(test_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, shuffle=False)

train_size = int(0.8 * len(train_dataset))
val_dataset = train_dataset.skip(train_size)
train_dataset = train_dataset.take(train_size)

In [5]:
train_dataset = train_dataset.map(resize_and_pad).map(swap_labels)
val_dataset = val_dataset.map(resize_and_pad).map(swap_labels)
test_dataset = test_dataset.map(resize_and_pad).map(swap_labels)

In [None]:
train_labels = np.concatenate([y.numpy() for _, y in train_dataset])
val_labels = np.concatenate([y.numpy() for _, y in val_dataset])
test_labels = np.concatenate([y.numpy() for _, y in test_dataset])
all_labels = np.concatenate((train_labels, val_labels, test_labels))

class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(all_labels), y=all_labels)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}
print("Class weights:", class_weight_dict)

In [None]:
tuner = BayesianOptimization(
    CustomHyperModel(),
    objective=kt.Objective("val_f1_score", direction="max"),
    max_trials=10,
    executions_per_trial=1,
    directory='logs',
    project_name='fraud_model',
    overwrite=True
)

hyperparameter_logger = HyperparameterLogger()

tuner.search(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    class_weight=class_weight_dict,
    callbacks=[hyperparameter_logger]
)

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
test_accuracy = best_model.evaluate(test_dataset)[1]
print(f"Test accuracy: {test_accuracy:.4f}")

test_predictions = best_model.predict(test_dataset)
test_labels = (test_predictions > 0.5).astype(int)[:, 1]

true_labels = np.concatenate([y.numpy() for _, y in test_dataset])
print(classification_report(true_labels, test_labels))

skplt.metrics.plot_confusion_matrix(true_labels, test_labels)
skplt.metrics.plot_roc(true_labels, test_predictions)
skplt.metrics.plot_precision_recall(true_labels, test_predictions)
plt.show()

In [None]:
best_model.summary()

In [None]:
logs_dir = 'logs/fraud_model'

trials_data = get_all_trials(logs_dir)
df = pd.DataFrame(trials_data)

df.to_csv('logs/keras_tuner_results.csv', index=False)