In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, LSTM, Bidirectional


In [None]:
# Define the paths
data_paths = {
    'Bearing Fault': r'E:\1 Paper Work\Cutting Tool Paper\Dataset\cutting tool data\new_data\CWT\Bearing Fault Data',
    'Gear Fault': r'E:\1 Paper Work\Cutting Tool Paper\Dataset\cutting tool data\new_data\CWT\Gear Fault Data',
    'Tool Fault': r'E:\1 Paper Work\Cutting Tool Paper\Dataset\cutting tool data\new_data\CWT\Tool Fault Data',
    'Normal': r'E:\1 Paper Work\Cutting Tool Paper\Dataset\cutting tool data\new_data\CWT\Normal Data'
}


In [None]:
# Function to load images and labels
def load_images_and_labels(data_paths):
    images = []
    labels = []
    for label, path in data_paths.items():
        for file in os.listdir(path):
            img_path = os.path.join(path, file)
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
            img = tf.keras.preprocessing.image.img_to_array(img)
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

images, labels = load_images_and_labels(data_paths)
print(len(images))
print(len(labels))


In [None]:
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# One-hot encode the labels
labels_one_hot = tf.keras.utils.to_categorical(labels_encoded)


In [None]:
# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels_one_hot, test_size=0.2, random_state=42)


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten

# Load the VGG16 model without the top layer
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of VGG16
for layer in vgg_base.layers:
    layer.trainable = False

# Add custom layers on top of VGG16
input_layer = Input(shape=(224, 224, 3))
x = vgg_base(input_layer)
x = Flatten()(x)
cnn_feature_model = Model(inputs=input_layer, outputs=x)

# Extract features for the training and validation sets
X_train_features = cnn_feature_model.predict(X_train)
X_val_features = cnn_feature_model.predict(X_val)


In [None]:
vgg_base.summary()

In [None]:
cnn_feature_model.summary()

In [None]:
# Reshape the features to fit LSTM input requirements
X_train_features = X_train_features.reshape(X_train_features.shape[0], 1, X_train_features.shape[1])
X_val_features = X_val_features.reshape(X_val_features.shape[0], 1, X_val_features.shape[1])


In [None]:
X_train.shape, X_val.shape

In [None]:
from tensorflow.keras.layers import LSTM

lstm_input = Input(shape=(1, X_train_features.shape[2]))
x = Bidirectional(LSTM(128))(lstm_input)
lstm_feature_model = Model(inputs=lstm_input, outputs=x)

# Extract temporal features using bidirectional LSTM
X_train_lstm_features = lstm_feature_model.predict(X_train_features)
X_val_lstm_features = lstm_feature_model.predict(X_val_features)


In [None]:
lstm_feature_model.summary()

In [None]:
from deap import base, creator, tools, algorithms
import random

# Define evaluation function
def evaluate(individual):
    selected_features = [i for i in range(len(individual)) if individual[i] == 1]
    if len(selected_features) == 0:
        return 0,
    
    X_train_selected = X_train_lstm_features[:, selected_features]
    X_val_selected = X_val_lstm_features[:, selected_features]
    
    model = tf.keras.Sequential([
        Dense(64, activation='relu', input_dim=len(selected_features)),
        Dense(4, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_selected, y_train, epochs=10, batch_size=8, verbose=0)
    
    _, accuracy = model.evaluate(X_val_selected, y_val, verbose=0)
    return accuracy,

# Define GA components
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X_train_lstm_features.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# GA execution
population = toolbox.population(n=50)
algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=3, verbose=True)

# Get the best individual
best_ind = tools.selBest(population, k=1)[0]
selected_features = [i for i in range(len(best_ind)) if best_ind[i] == 1]


In [None]:
selected_features

In [None]:
# Select the features based on GA
X_train_selected = X_train_lstm_features[:, selected_features]
X_val_selected = X_val_lstm_features[:, selected_features]

# Define the final classification model
final_model = tf.keras.Sequential([
    Dense(64, activation='relu', input_dim=len(selected_features)),
    Dense(4, activation='softmax')
])

final_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the final model
history = final_model.fit(X_train_selected, y_train, epochs=5, batch_size=8, validation_data=(X_val_selected, y_val))


In [None]:
# Evaluate the model on the validation set
val_loss, val_accuracy = final_model.evaluate(X_val_selected, y_val)
print(f'Validation Accuracy: {val_accuracy*100:.2f}%')


In [None]:
import matplotlib.pyplot as plt

# Get training accuracy values
acc = history.history['accuracy']

# Plot training accuracy
plt.figure(figsize=(8, 6))
plt.plot(acc, label='Training Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
# Plot and save accuracy
plt.plot(history.epoch,history.history['accuracy'], label='accuracy')
plt.plot(history.epoch,history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1.1])
plt.legend(loc='lower right') 

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()



In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Predict labels for the training data
y_train_pred = final_model.predict(X_train_selected)
y_train_pred_classes = np.argmax(y_train_pred, axis=1)
y_train_true_classes = np.argmax(y_train, axis=1)

# Compute confusion matrix
cm = confusion_matrix(y_train_true_classes, y_train_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(cmap=plt.cm.Blues)
#plt.title('Confusion Matrix - Training Data')
plt.show()


In [None]:
# Plot confusion matrix
y_val_pred = final_model.predict(X_val_selected)
y_val_pred_classes = np.argmax(y_val_pred, axis=1)
y_val_true_classes = np.argmax(y_val, axis=1)

cm = confusion_matrix(y_val_true_classes, y_val_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(data_paths.keys()))
fig, ax = plt.subplots(figsize=(6,6))
disp.plot(ax=ax, cmap='Blues')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

# Binarize the output
y_train_binarized = label_binarize(y_train_true_classes, classes=[0, 1, 2, 3])

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
n_classes = y_train.shape[1]

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_train_binarized[:, i], y_train_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot all ROC curves
plt.figure()
colors = ['aqua', 'darkorange', 'cornflowerblue', 'green']
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2, label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))
plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Training Data')
plt.legend(loc="lower right")
plt.show()


In [None]:
from sklearn.decomposition import PCA

# Reduce features to 2D using PCA
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_selected)

# Plot decision boundaries
x_min, x_max = X_train_pca[:, 0].min() - 1, X_train_pca[:, 0].max() + 1
y_min, y_max = X_train_pca[:, 1].min() - 1, X_train_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))

# Create a mesh to plot points in
Z = final_model.predict(pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()]))
Z = np.argmax(Z, axis=1)
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c=y_train_true_classes, edgecolor='k', s=20)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Decision Boundary')
plt.show()


In [None]:
from sklearn.manifold import TSNE

# Reduce features to 2D using t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_train_tsne = tsne.fit_transform(X_train_selected)

# Plot t-SNE
plt.figure()
for i, color, label in zip(range(n_classes), colors, label_encoder.classes_):
    plt.scatter(X_train_tsne[y_train_true_classes == i, 0], X_train_tsne[y_train_true_classes == i, 1], color=color, lw=2, label=label)
plt.title('t-SNE')
plt.legend(loc='best')
plt.show()


In [None]:
from collections import Counter
print(Counter(labels))


In [None]:
from sklearn.metrics import classification_report
# Generate classification report
train_report = classification_report(y_train_true_classes, y_train_pred_classes, target_names=label_encoder.classes_)
print(train_report)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Sample data - replace with your actual feature names or indices if available
# Let's assume `selected_features` contains the indices of the features chosen by GA
selected_features = [0, 2, 4, 6, 7, 10, 11, 15, 16, 19, 20, 23, 24, 26, 30, 31, 33, 39, 40, 42, 44, 45, 47, 
                     53, 54, 57, 58, 62, 63, 64, 66, 67, 70, 75, 76, 78, 80, 81, 83, 84, 85, 89, 92, 93, 
                     95, 96, 97, 99, 101, 103, 104, 107, 108, 110, 111, 112, 114, 115, 116, 118, 120, 126, 
                     127, 128, 129, 132, 135, 140, 143, 144, 148, 149, 152, 156, 157, 158, 160, 163, 170, 
                     176, 177, 178, 184, 185, 186, 187, 191, 194, 195, 198, 202, 203, 206, 208, 209, 210, 
                     213, 215, 218, 220, 222, 223, 226, 227, 229, 230, 232, 237, 238, 243, 244, 245, 246, 
                     254]

# Create a DataFrame for visualization purposes
feature_importance_df = pd.DataFrame({
    'Feature': selected_features,
    'Importance': [1] * len(selected_features)  # Assign equal importance to each selected feature
})

# Sort the features by index or name (optional)
feature_importance_df = feature_importance_df.sort_values(by="Feature")

# Plotting
plt.figure(figsize=(12, 8))
plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance'], align='center')
plt.xlabel('Importance Score (Selected by GA)')
plt.title('Important Features Selected by Genetic Algorithm')
plt.gca().invert_yaxis()  # Show higher feature indices at the top if needed
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Sample data - Replace with your actual selected feature indices
selected_features = [0, 2, 4, 6, 7, 10, 11, 15, 16, 19, 20, 23, 24, 26, 30, 31, 33, 39, 40, 42, 44, 45, 47, 
                     53, 54, 57, 58, 62, 63, 64, 66, 67, 70, 75, 76, 78, 80, 81, 83, 84, 85, 89, 92, 93, 
                     95, 96, 97, 99, 101, 103, 104, 107, 108, 110, 111, 112, 114, 115, 116, 118, 120, 126, 
                     127, 128, 129, 132, 135, 140, 143, 144, 148, 149, 152, 156, 157, 158, 160, 163, 170, 
                     176, 177, 178, 184, 185, 186, 187, 191, 194, 195, 198, 202, 203, 206, 208, 209, 210, 
                     213, 215, 218, 220, 222, 223, 226, 227, 229, 230, 232, 237, 238, 243, 244, 245, 246, 
                     254]

# Plot histogram of selected feature indices
plt.figure(figsize=(10, 6))
plt.hist(selected_features, bins=30, edgecolor='black')
plt.xlabel('Feature Index')
plt.ylabel('Frequency')
plt.title('Distribution of Selected Features by Genetic Algorithm')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample weights from the Dense layer (replace with actual weights from your model)
# Assuming `dense_weights` is a NumPy array of shape (103,) for each selected feature
# You can obtain this array after training the model by using `model.layers[-2].get_weights()[0]`
dense_weights = np.random.rand(103)  # Replace with actual Dense layer weights

# Calculate feature importance as the absolute value of the Dense layer weights
feature_importance = np.abs(dense_weights)

# Sort features by importance for a cleaner plot
sorted_indices = np.argsort(feature_importance)[::-1]
sorted_importance = feature_importance[sorted_indices]

# Plotting
plt.figure(figsize=(12, 8))
plt.bar(range(len(sorted_importance)), sorted_importance, align='center')
plt.xlabel('Feature Index (sorted by importance)')
plt.ylabel('Importance Score')
plt.title('Feature Importance of 103 Selected Features (based on Dense Layer Weights)')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data - Replace with your actual selected feature indices
selected_features = [0, 2, 4, 6, 7, 10, 11, 15, 16, 19, 20, 23, 24, 26, 30, 31, 33, 39, 40, 42, 44, 45, 47, 
                     53, 54, 57, 58, 62, 63, 64, 66, 67, 70, 75, 76, 78, 80, 81, 83, 84, 85, 89, 92, 93, 
                     95, 96, 97, 99, 101, 103, 104, 107, 108, 110, 111, 112, 114, 115, 116, 118, 120, 126, 
                     127, 128, 129, 132, 135, 140, 143, 144, 148, 149, 152, 156, 157, 158, 160, 163, 170, 
                     176, 177, 178, 184, 185, 186, 187, 191, 194, 195, 198, 202, 203, 206, 208, 209, 210, 
                     213, 215, 218, 220, 222, 223, 226, 227, 229, 230, 232, 237, 238, 243, 244, 245, 246, 
                     254]

# Plot histogram of selected feature indices with enhancements
plt.figure(figsize=(8, 6))
plt.hist(selected_features, bins=30, edgecolor='black', color='steelblue', alpha=0.7)
plt.xlabel('Feature Index', fontsize=14, weight='bold')
plt.ylabel('Frequency', fontsize=14, weight='bold')
#plt.title('Distribution of Selected Features by Genetic Algorithm', fontsize=16, weight='bold')
#plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample weights - replace with actual Dense layer weights or feature importance scores
dense_weights = np.random.rand(103)  # Replace with actual Dense layer weights
feature_importance = np.abs(dense_weights)

# Sort features by importance
sorted_indices = np.argsort(feature_importance)[::-1]
sorted_importance = feature_importance[sorted_indices]

# Create an enhanced bar plot
plt.figure(figsize=(12, 8))
bars = plt.bar(range(len(sorted_importance)), sorted_importance, color=plt.cm.viridis(np.linspace(0, 1, len(sorted_importance))), edgecolor='black', alpha=0.8)
plt.xlabel('Feature Index (sorted by importance)', fontsize=14, weight='bold')
plt.ylabel('Importance Score', fontsize=14, weight='bold')
#plt.title('Feature Importance of 103 Selected Features', fontsize=16, weight='bold')
#plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)


plt.tight_layout()
plt.show()
