In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np
import pickle
import os
from tensorflow.keras.mixed_precision import Policy, set_global_policy
from joblib import Parallel, delayed

# Enable mixed precision
set_global_policy(Policy('mixed_float16'))

# GPU Configuration: Set memory growth and limit
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print("GPU memory growth enabled.")
    except Exception as e:
        print(f"Error configuring GPU: {e}")
else:
    print("No GPU detected, running on CPU.")

# File paths
FAKE_TRAIN_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/standardized_fake_train.pkl'
REAL_TRAIN_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/standardized_real_train.pkl'
FAKE_VALID_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/spatial_valid_fake.pkl'
REAL_VALID_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/spatial_valid_real.pkl'
CHECKPOINT_PATH = "drive/MyDrive/SP_cup/checkpoints/model_optimized.keras"
os.makedirs(os.path.dirname(CHECKPOINT_PATH), exist_ok=True)

# Function to load features
def load_features(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

# Parallel feature validation and extraction
def validate_and_extract(features):
    def extract_feature(entry):
        if isinstance(entry, list):
            return [sub_entry['features'] for sub_entry in entry if isinstance(sub_entry, dict) and 'features' in sub_entry]
        elif isinstance(entry, dict) and 'features' in entry:
            return [entry['features']]
        return []

    with Parallel(n_jobs=-1, backend='threading') as parallel:
        results = parallel(delayed(extract_feature)(entry) for entry in features)
    valid_features = [item for sublist in results for item in sublist]
    return np.array(valid_features, dtype=np.float32)

# Augment features for robustness
def augment_features(X, y, augment_factor=2):
    augmented_X, augmented_y = [], []
    for _ in range(augment_factor):
        noise = np.random.normal(0, 0.01, X.shape)
        scale = np.random.uniform(0.9, 1.1, X.shape)
        X_augmented = X + noise
        X_augmented *= scale
        augmented_X.append(X_augmented)
        augmented_y.append(y)
    return np.vstack(augmented_X), np.hstack(augmented_y)

# Build the optimized model
def build_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train and evaluate the model
def train_and_evaluate():
    print("Loading training features...")

    # Load features
    fake_features = load_features(FAKE_TRAIN_FEATURES_PATH)
    real_features = load_features(REAL_TRAIN_FEATURES_PATH)

    # Validate and extract feature vectors
    X_fake = validate_and_extract(fake_features)
    X_real = validate_and_extract(real_features)

    # Create labels
    y_fake = np.ones(len(X_fake))
    y_real = np.zeros(len(X_real))

    # Combine data and labels
    X_combined = np.vstack((X_fake, X_real))
    y_combined = np.hstack((y_fake, y_real))

    # Normalize features
    scaler = StandardScaler()
    X_combined = scaler.fit_transform(X_combined)

    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(
        X_combined, y_combined, test_size=0.2, random_state=42, stratify=y_combined
    )

    # Augment training data
    print("Augmenting training data...")
    X_train, y_train = augment_features(X_train, y_train, augment_factor=2)

    # Debugging shapes
    print("Shape of X_train:", X_train.shape)
    print("Shape of X_val:", X_val.shape)
    print("Shape of y_train:", y_train.shape)
    print("Shape of y_val:", y_val.shape)

    # Build the model
    input_shape = (X_train.shape[1],)
    model = build_model(input_shape)

    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, mode='min'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1),
        ModelCheckpoint(CHECKPOINT_PATH, save_best_only=True, monitor='val_loss', mode='min')
    ]

    # Train the model
    print("Starting training...")
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=64,
        callbacks=callbacks,
        verbose=1,
        class_weight={0: 1.0, 1: 3.0}  # Adjust class weights as needed
    )

    print("Model training complete!")

    # After training, load the best model
    model = load_model(CHECKPOINT_PATH)

    # Evaluate on validation set
    print("Evaluating the model...")
    val_predictions = model.predict(X_val, batch_size=64, verbose=1)
    val_predictions = (val_predictions > 0.5).astype(int)  # Threshold at 0.5 for binary classification

    accuracy = np.mean(val_predictions == y_val)
    auc = roc_auc_score(y_val, val_predictions)

    # Classification report
    print("Classification Report:")
    print(classification_report(y_val, val_predictions))

    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC-ROC: {auc:.4f}")

# Run training and evaluation
train_and_evaluate()


No GPU detected, running on CPU.
Loading training features...
Augmenting training data...
Shape of X_train: (202886, 1280)
Shape of X_val: (25361, 1280)
Shape of y_train: (202886,)
Shape of y_val: (25361,)
Starting training...
Epoch 1/20
[1m3171/3171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 13ms/step - accuracy: 0.8700 - loss: 4.0647 - val_accuracy: 0.9998 - val_loss: 1.3094 - learning_rate: 1.0000e-04
Epoch 2/20
[1m3171/3171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9993 - loss: 0.8840 - val_accuracy: 0.9999 - val_loss: 0.1352 - learning_rate: 1.0000e-04
Epoch 3/20
[1m3171/3171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 12ms/step - accuracy: 0.9995 - loss: 0.1070 - val_accuracy: 1.0000 - val_loss: 0.0487 - learning_rate: 1.0000e-04
Epoch 4/20
[1m3171/3171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 13ms/step - accuracy: 0.9995 - loss: 0.0460 - val_accuracy: 0.9999 - val_loss: 0.0335 - learning_rate: 1.0000e-

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load validation features
def load_validation_data():
    fake_features = load_features(FAKE_VALID_FEATURES_PATH)
    real_features = load_features(REAL_VALID_FEATURES_PATH)

    # Extract feature vectors
    X_fake = validate_and_extract(fake_features)
    X_real = validate_and_extract(real_features)

    # Create labels
    y_fake = np.ones(len(X_fake))
    y_real = np.zeros(len(X_real))

    # Combine data and labels
    X_combined = np.vstack((X_fake, X_real))
    y_combined = np.hstack((y_fake, y_real))

    # Fit and apply a new scaler
    print("Fitting a new scaler...")
    scaler = StandardScaler()
    X_combined = scaler.fit_transform(X_combined)

    return X_combined, y_combined


In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
import joblib  # For saving and loading the scaler
import os  # For checking file existence

# File paths for validation data and scaler
FAKE_VALID_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/spatial_valid_fake.pkl'
REAL_VALID_FEATURES_PATH = 'drive/MyDrive/SP_cup/features/spatial_valid_real.pkl'
CHECKPOINT_PATH = "drive/MyDrive/SP_cup/checkpoints/model_optimized.keras"
SCALER_PATH = 'drive/MyDrive/SP_cup/scaler.pkl'  # Path to save/load the scaler

# Function to load features
def load_features(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

# Parallel feature validation and extraction
def validate_and_extract(features):
    def extract_feature(entry):
        if isinstance(entry, list):
            return [sub_entry['features'] for sub_entry in entry if isinstance(sub_entry, dict) and 'features' in sub_entry]
        elif isinstance(entry, dict) and 'features' in entry:
            return [entry['features']]
        return []

    valid_features = [item for sublist in features for item in extract_feature(sublist)]
    return np.array(valid_features, dtype=np.float32)

# Load validation features
def load_validation_data():
    fake_features = load_features(FAKE_VALID_FEATURES_PATH)
    real_features = load_features(REAL_VALID_FEATURES_PATH)

    # Extract feature vectors
    X_fake = validate_and_extract(fake_features)
    X_real = validate_and_extract(real_features)

    # Create labels
    y_fake = np.ones(len(X_fake))
    y_real = np.zeros(len(X_real))

    # Combine data and labels
    X_combined = np.vstack((X_fake, X_real))
    y_combined = np.hstack((y_fake, y_real))

    # Check if the scaler file exists
    if os.path.exists(SCALER_PATH):
        print(f"Loading existing scaler from {SCALER_PATH}...")
        scaler = joblib.load(SCALER_PATH)
    else:
        print(f"Scaler not found at {SCALER_PATH}. Creating and saving a new scaler...")
        scaler = StandardScaler()
        scaler.fit(X_combined)
        joblib.dump(scaler, SCALER_PATH)
        print(f"Scaler saved at {SCALER_PATH}.")

    # Normalize features
    X_combined = scaler.transform(X_combined)

    return X_combined, y_combined

# Test the model
def test_model():
    print("Loading validation data...")
    X_val, y_val = load_validation_data()

    # Load the best model
    model = load_model(CHECKPOINT_PATH)

    # Evaluate on the validation set
    print("Evaluating the model...")
    val_predictions = model.predict(X_val, batch_size=64, verbose=1)
    val_predictions = (val_predictions > 0.5).astype(int)  # Threshold at 0.5 for binary classification

    accuracy = np.mean(val_predictions == y_val)
    auc = roc_auc_score(y_val, val_predictions)

    # Classification report
    print("Classification Report:")
    print(classification_report(y_val, val_predictions))

    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC-ROC: {auc:.4f}")

# Run testing
test_model()


Loading validation data...
Scaler not found at drive/MyDrive/SP_cup/scaler.pkl. Creating and saving a new scaler...
Scaler saved at drive/MyDrive/SP_cup/scaler.pkl.


  saveable.load_own_variables(weights_store.get(inner_path))


Evaluating the model...
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Classification Report:
              precision    recall  f1-score   support

         0.0       0.31      0.00      0.01      1548
         1.0       0.50      0.99      0.66      1524

    accuracy                           0.49      3072
   macro avg       0.40      0.50      0.33      3072
weighted avg       0.40      0.49      0.33      3072

Accuracy: 0.4961
AUC-ROC: 0.4980
