In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from joblib import dump  # For saving preprocessing objects

# Configure GPU usage
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        # Enable memory growth to prevent TensorFlow from allocating all GPU memory at once
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Found {len(physical_devices)} GPU(s). CUDA acceleration enabled.")
    except RuntimeError as e:
        print(f"Error configuring GPU: {e}")
else:
    print("No GPU found. Running on CPU.")

# Load dataset

df = pd.read_csv("combined_trainset.csv")

# Define features and target
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

# Encode target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Initialize KFold cross-validator
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Metrics storage and model saving
best_accuracy = 0
best_model = None
scaler = None

fold_metrics = {
    'accuracies': [],
    'precisions': [],
    'recalls': [],
    'f1_scores': []
}

for fold, (train_ids, val_ids) in enumerate(kfold.split(X)):
    print(f"\nProcessing Fold {fold + 1}")
    
    # Data splitting
    X_train, X_val = X[train_ids], X[val_ids]
    y_train, y_val = y[train_ids], y[val_ids]
    
    # Feature scaling
    fold_scaler = StandardScaler()
    X_train = fold_scaler.fit_transform(X_train)
    X_val = fold_scaler.transform(X_val)
    
    # Ensure data is on GPU if available
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.int32)
    
    # Model creation
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(len(np.unique(y)), activation='softmax')
    ])
    
    model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    
    # Model training with GPU support
    with tf.device('/GPU:0'):
        history = model.fit(X_train, y_train,
                           epochs=50,
                           batch_size=32,
                           validation_data=(X_val, y_val),
                           verbose=1)
    
    # Evaluation
    y_pred = np.argmax(model.predict(X_val, verbose=0), axis=1)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')
    
    # Store metrics
    fold_metrics['accuracies'].append(accuracy)
    fold_metrics['precisions'].append(precision)
    fold_metrics['recalls'].append(recall)
    fold_metrics['f1_scores'].append(f1)
    
    # Save best model and scaler
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        scaler = fold_scaler

    print(f"Fold {fold + 1} - Accuracy: {accuracy:.4f}")

# Save final artifacts
best_model.save('Model_file/best_model.h5')
dump(scaler, 'Model_file/scaler.joblib')
dump(label_encoder, 'Model_file/label_encoder.joblib')



print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(fold_metrics['accuracies']):.4f}")
print(f"Average Precision: {np.mean(fold_metrics['precisions']):.4f}")
print(f"Average Recall: {np.mean(fold_metrics['recalls']):.4f}")
print(f"Average F1-Score: {np.mean(fold_metrics['f1_scores']):.4f}")




No GPU found. Running on CPU.

Processing Fold 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.7207 - loss: 0.7792 - val_accuracy: 0.7521 - val_loss: 0.5911
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7590 - loss: 0.5796 - val_accuracy: 0.7543 - val_loss: 0.5814
Epoch 3/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7629 - loss: 0.5689 - val_accuracy: 0.7593 - val_loss: 0.5632
Epoch 4/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7635 - loss: 0.5558 - val_accuracy: 0.7617 - val_loss: 0.5609
Epoch 5/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7658 - loss: 0.5519 - val_accuracy: 0.7470 - val_loss: 0.5697
Epoch 6/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7749 - loss: 0.5339 - val_accuracy: 0.7581 - val_loss: 0.5479
Epoch 7/50
[1m519/519[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7329 - loss: 0.7604 - val_accuracy: 0.7398 - val_loss: 0.6268
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7634 - loss: 0.5743 - val_accuracy: 0.7552 - val_loss: 0.6011
Epoch 3/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7634 - loss: 0.5674 - val_accuracy: 0.7499 - val_loss: 0.5935
Epoch 4/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7639 - loss: 0.5497 - val_accuracy: 0.7550 - val_loss: 0.5895
Epoch 5/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7631 - loss: 0.5487 - val_accuracy: 0.7507 - val_loss: 0.5800
Epoch 6/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7825 - loss: 0.5204 - val_accuracy: 0.7483 - val_loss: 0.5887
Epoch 7/50
[1m519/519[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7109 - loss: 0.7559 - val_accuracy: 0.7514 - val_loss: 0.6026
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7561 - loss: 0.5768 - val_accuracy: 0.7605 - val_loss: 0.5866
Epoch 3/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7673 - loss: 0.5609 - val_accuracy: 0.7601 - val_loss: 0.5743
Epoch 4/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7720 - loss: 0.5400 - val_accuracy: 0.7632 - val_loss: 0.5689
Epoch 5/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7695 - loss: 0.5420 - val_accuracy: 0.7536 - val_loss: 0.5646
Epoch 6/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7713 - loss: 0.5272 - val_accuracy: 0.7637 - val_loss: 0.5669
Epoch 7/50
[1m519/519[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7082 - loss: 0.7904 - val_accuracy: 0.7656 - val_loss: 0.5674
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7503 - loss: 0.5878 - val_accuracy: 0.7714 - val_loss: 0.5562
Epoch 3/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7642 - loss: 0.5658 - val_accuracy: 0.7685 - val_loss: 0.5519
Epoch 4/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7562 - loss: 0.5680 - val_accuracy: 0.7781 - val_loss: 0.5395
Epoch 5/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7725 - loss: 0.5429 - val_accuracy: 0.7781 - val_loss: 0.5337
Epoch 6/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7674 - loss: 0.5411 - val_accuracy: 0.7750 - val_loss: 0.5322
Epoch 7/50
[1m519/519[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7097 - loss: 0.8020 - val_accuracy: 0.7548 - val_loss: 0.5746
Epoch 2/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7495 - loss: 0.5906 - val_accuracy: 0.7716 - val_loss: 0.5699
Epoch 3/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7595 - loss: 0.5658 - val_accuracy: 0.7704 - val_loss: 0.5433
Epoch 4/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7603 - loss: 0.5467 - val_accuracy: 0.7738 - val_loss: 0.5388
Epoch 5/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7654 - loss: 0.5462 - val_accuracy: 0.7808 - val_loss: 0.5289
Epoch 6/50
[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7630 - loss: 0.5505 - val_accuracy: 0.7784 - val_loss: 0.5367
Epoch 7/50
[1m519/519[0m [32m━━━━━━━

KeyboardInterrupt: 