In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from joblib import dump  # For saving preprocessing objects

# Configure GPU usage
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        # Enable memory growth to prevent TensorFlow from allocating all GPU memory at once
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Found {len(physical_devices)} GPU(s). CUDA acceleration enabled.")
    except RuntimeError as e:
        print(f"Error configuring GPU: {e}")
else:
    print("No GPU found. Running on CPU.")

# Load dataset

df = pd.read_csv("combined_trainset.csv")

# Define features and target
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

# Encode target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Initialize KFold cross-validator
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Metrics storage and model saving
best_accuracy = 0
best_model = None
scaler = None

fold_metrics = {
    'accuracies': [],
    'precisions': [],
    'recalls': [],
    'f1_scores': []
}

for fold, (train_ids, val_ids) in enumerate(kfold.split(X)):
    print(f"\nProcessing Fold {fold + 1}")
    
    # Data splitting
    X_train, X_val = X[train_ids], X[val_ids]
    y_train, y_val = y[train_ids], y[val_ids]
    
    # Feature scaling
    fold_scaler = StandardScaler()
    X_train = fold_scaler.fit_transform(X_train)
    X_val = fold_scaler.transform(X_val)
    
    # Ensure data is on GPU if available
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.int32)
    
    # Model creation
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(len(np.unique(y)), activation='softmax')
    ])
    
    model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    
    # Model training with GPU support
    with tf.device('/GPU:0'):
        history = model.fit(X_train, y_train,
                           epochs=50,
                           batch_size=32,
                           validation_data=(X_val, y_val),
                           verbose=1)
    
    # Evaluation
    y_pred = np.argmax(model.predict(X_val, verbose=0), axis=1)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')
    
    # Store metrics
    fold_metrics['accuracies'].append(accuracy)
    fold_metrics['precisions'].append(precision)
    fold_metrics['recalls'].append(recall)
    fold_metrics['f1_scores'].append(f1)
    
    # Save best model and scaler
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        scaler = fold_scaler

    print(f"Fold {fold + 1} - Accuracy: {accuracy:.4f}")

# Save final artifacts
best_model.save('Model_file/best_model.h5')
dump(scaler, 'Model_file/scaler.joblib')
dump(label_encoder, 'Model_file/label_encoder.joblib')



print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(fold_metrics['accuracies']):.4f}")
print(f"Average Precision: {np.mean(fold_metrics['precisions']):.4f}")
print(f"Average Recall: {np.mean(fold_metrics['recalls']):.4f}")
print(f"Average F1-Score: {np.mean(fold_metrics['f1_scores']):.4f}")


