In [None]:
import pandas as pd
import numpy as np
import os
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pickle

# Load CSV
df = pd.read_csv(r"Training\c3_main.csv")  # ✅ Updated path

# Separate metadata
meta_data = df.iloc[:, :4]

# Features (columns 5 to 772)
X = df.iloc[:, 4:-1].values  # all except last column

# Target label (last column)
y = df.iloc[:, -1].values

# Encode string class labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Print original class distribution
unique, counts = np.unique(y_encoded, return_counts=True)
print("Class distribution before ROS:", dict(zip(unique, counts)))

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Apply Random OverSampling
ros = RandomOverSampler(random_state=42)
X_res, y_res = ros.fit_resample(X, y_encoded)

# Print new class distribution
unique_res, counts_res = np.unique(y_res, return_counts=True)
print("Class distribution after ROS:", dict(zip(unique_res, counts_res)))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

# Model creation
def create_model(trial):
    model = Sequential()

    # Trial params
    layers = trial.suggest_int('layers', 2, 4)
    units = trial.suggest_int('units', 128, 512, step=128)
    dropout_rate = trial.suggest_float('dropout_rate', 0.3, 0.6)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)

    # Input layer
    model.add(Dense(units=units, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(dropout_rate))

    # Hidden layers
    for _ in range(layers - 1):
        model.add(Dense(units=units, activation='relu'))
        model.add(Dropout(dropout_rate))

    # Output layer (multi-class)
    num_classes = len(np.unique(y_train))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Optuna objective
def objective(trial):
    model = create_model(trial)

    # Learning rate scheduler
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    # Train
    history = model.fit(X_train, y_train, epochs=20, batch_size=32,
                        validation_data=(X_test, y_test),
                        callbacks=[reduce_lr], verbose=1)

    # Plot

    plt.figure(figsize=(10, 6), dpi=300)
    epochs_range = range(1, len(history.history['accuracy']) + 1)

    plt.plot(epochs_range, history.history['accuracy'], 'b-', label='Training Accuracy')
    plt.plot(epochs_range, history.history['val_accuracy'], 'b--', label='Validation Accuracy')
    plt.plot(epochs_range, history.history['loss'], 'r-', label='Training Loss')
    plt.plot(epochs_range, history.history['val_loss'], 'r--', label='Validation Loss')

#    plt.title(f'Trial {trial.number} - Training & Validation Accuracy and Loss')
    plt.xlabel('Epochs', fontsize=14)
    plt.ylabel('Metric Value', fontsize=14)
    plt.legend()
    plt.grid(True)

    # Create folder to save plots if not exists
    os.makedirs("trial_plots_c3", exist_ok=True)
    plot_path = os.path.join("trial_plots_c3", f'trial_{trial.number}.png')
    plt.savefig(plot_path)
    plt.close()  # Close plot to free memory

    # Final metrics
    final_train_acc = history.history['accuracy'][-1]
    final_val_acc = history.history['val_accuracy'][-1]
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

    print(f"Final Training Accuracy: {final_train_acc:.4f}")
    print(f"Final Validation Accuracy: {final_val_acc:.4f}")
    print(f"Final Test Accuracy: {test_acc:.4f}")

    # Return validation accuracy as objective
    return final_val_acc

# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)

# Best result
best_trial = study.best_trial
print(f"\nBest Trial Number: {best_trial.number}")
print(f"Best Validation Accuracy: {best_trial.value:.4f}")
print("Best Hyperparameters:", best_trial.params)

# Retrain final model using best trial
best_params = best_trial.params
final_model = Sequential()

# Recreate architecture with best parameters
final_model.add(Dense(units=best_params['units'], input_dim=X_train.shape[1], activation='relu'))
final_model.add(Dropout(best_params['dropout_rate']))
for _ in range(best_params['layers'] - 1):
    final_model.add(Dense(units=best_params['units'], activation='relu'))
    final_model.add(Dropout(best_params['dropout_rate']))
final_model.add(Dense(len(np.unique(y_train)), activation='softmax'))

# Compile and train
final_model.compile(optimizer=Adam(learning_rate=best_params['learning_rate']),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

final_model.fit(X_train, y_train, epochs=20, batch_size=32,
                validation_data=(X_test, y_test), verbose=1)

# Save model, label encoder, and scaler
final_model.save("C3_model_name.h5")  # ✅ Updated filename

with open("C3_label_encoder.pkl", "wb") as f:  # ✅ Updated filename
    pickle.dump(label_encoder, f)
with open("C3_scaler.pkl", "wb") as f:  # ✅ Updated filename
    pickle.dump(scaler, f)

print("✅ Final model, label encoder, and scaler saved.")


In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import pickle
from sklearn.preprocessing import StandardScaler

# Load saved model, label encoder, and scaler
model = load_model("C3_model_name.h5")
with open("C3_label_encoder.pkl", "wb") as f:  # ✅ Updated filename
    pickle.dump(label_encoder, f)
with open("C3_scaler.pkl", "wb") as f:  # ✅ Updated filename
    pickle.dump(scaler, f)

# Load new data (without target labels)
new_data = pd.read_csv(r"C:\Users\abir1\OneDrive\Desktop\c3.csv")
metadata = new_data.iloc[:, :4]
X_new = new_data.iloc[:, 4:].values

# Normalize using the saved scaler
X_new_scaled = scaler.transform(X_new)

# Predict
pred_probs = model.predict(X_new_scaled)
pred_classes = np.argmax(pred_probs, axis=1)
pred_labels = label_encoder.inverse_transform(pred_classes)

# Combine with metadata and confidence values
output_df = metadata.copy()
output_df['Predicted_Label'] = pred_labels

# Save predictions
output_df.to_csv("Predicted_Labels_with_Confidence_C3.csv", index=False)

# Print final overall confidence
print(f"✅ Predictions saved to Predicted_Labels_with_Confidence_C3.csv")


In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import pickle
from sklearn.preprocessing import StandardScaler

# === Load saved model, label encoder, and scaler ===
model = load_model(r"C:\Users\abir1\C3_model_name.h5")

with open(r"C:\Users\abir1\C3_label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

with open(r"C:\Users\abir1\C3_scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# === Load new data (without target labels) ===
new_data = pd.read_csv(r"C:\Users\abir1\OneDrive\Desktop\test_NP\c3.csv")
metadata = new_data.iloc[:, :4]
X_new = new_data.iloc[:, 4:].values

# === Normalize using the saved scaler ===
X_new_scaled = scaler.transform(X_new)

# === Predict ===
pred_probs = model.predict(X_new_scaled)
pred_classes = np.argmax(pred_probs, axis=1)
pred_labels = label_encoder.inverse_transform(pred_classes)

# === Confidence: the probability associated with the predicted class ===
confidences = np.max(pred_probs, axis=1)

# === Overall confidence: average confidence for all predictions ===
overall_confidence = np.mean(confidences)

# === Combine with metadata and confidence values ===
output_df = metadata.copy()
output_df['Predicted_Label'] = pred_labels
output_df['Confidence'] = confidences

# === Save predictions ===
output_df.to_csv("Predicted_Labels_with_Confidence_C3.csv", index=False)

# === Print final overall confidence ===
print(f"✅ Predictions saved to Predicted_Labels_with_Confidence_C3.csv")
print(f"Overall Confidence: {overall_confidence:.4f}")


In [None]:
import numpy as np
# Standard deviation of confidence scores
confidence_std = np.std(confidences)

# Print standard deviation
print(f"Standard Deviation of Confidence: {confidence_std:.4f}")
