In [4]:
# Modifying the provided 'edited-model.py' to incorporate suggested improvements

# Modified script content with additional tuning, early stopping, and class distribution checks.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import joblib

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorboard import notebook

In [5]:
# Load the data
df = pd.read_csv('C:/Users/honey/Documents/placment work/Electronical-AI-Device-Recogniser/khanya/data managment/datasets/acs-f2-dataset.csv')

In [6]:
# Data Preprocessing
df = df.drop('time', axis=1)

# Convert categorical labels to numeric
le = LabelEncoder()
df['equipment'] = le.fit_transform(df['equipment'])

# Split features and labels
X = df.drop('equipment', axis=1)
y = df['equipment']

# Analyze class distribution before SMOTE
print("Class distribution before SMOTE:")
print(y.value_counts())

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance using SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_scaled, y)

Class distribution before SMOTE:
equipment
3     10776
2     10772
12    10763
13    10762
14    10751
1     10747
11    10742
0     10737
4     10735
6     10734
5     10733
10    10727
8     10716
9     10711
7     10702
Name: count, dtype: int64


In [7]:
# Analyze class distribution after SMOTE
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

# One-hot encode the target labels
num_classes = len(np.unique(y))
y_resampled_onehot = to_categorical(y_resampled, num_classes=num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled_onehot, test_size=0.2, random_state=42)

Class distribution after SMOTE:
equipment
0     10776
1     10776
2     10776
3     10776
4     10776
5     10776
6     10776
7     10776
8     10776
9     10776
10    10776
11    10776
12    10776
13    10776
14    10776
Name: count, dtype: int64


In [None]:
from tensorflow.keras.layers import Input, Concatenate, Multiply
from tensorflow.keras import backend as K

# Define the model architecture with weighted input for 'freq' column and dropout
input_layer = Input(shape=(X_train.shape[1],))
freq_weight = K.constant([[2.0]])  # Convert the weight to a tensor with the same shape as freq_input

# Split the input into 'freq' and other features
freq_input = input_layer[:, 0:1]
other_features = input_layer[:, 1:]

# Apply the weight to the 'freq' column
weighted_freq = Multiply()([freq_input, freq_weight])

# Concatenate the weighted 'freq' column back with the other features
weighted_input = Concatenate()([weighted_freq, other_features])

# Define the rest of the model
x = Dense(256, activation='relu')(weighted_input)
x = Dropout(0.3)(x)  # Increased dropout for stronger regularization
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(num_classes, activation='softmax')(x)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model with a lower learning rate for better generalization
model.compile(optimizer=Adam(learning_rate=0.0005), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test),
                    epochs=100, 
                    batch_size=32, 
                    callbacks=[early_stopping])


Epoch 1/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.3261 - loss: 1.9980 - val_accuracy: 0.5064 - val_loss: 1.4393
Epoch 2/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.4730 - loss: 1.4695 - val_accuracy: 0.5696 - val_loss: 1.2398
Epoch 3/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.5259 - loss: 1.3145 - val_accuracy: 0.6014 - val_loss: 1.1137
Epoch 4/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.5632 - loss: 1.2117 - val_accuracy: 0.6458 - val_loss: 1.0370
Epoch 5/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.5943 - loss: 1.1327 - val_accuracy: 0.6645 - val_loss: 0.9734
Epoch 6/100
[1m4041/4041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.6138 - loss: 1.0827 - val_accuracy: 0.6787 - val_loss: 0.9304
Epoch 7

In [3]:
# Define the class names
le = LabelEncoder()  # Ensure le is defined
le.fit(df['equipment'])  # Fit the LabelEncoder with the 'equipment' column
class_names = le.classes_

# Plot the confusion matrix with labels
plt.figure(figsize=(20, 10))

plt.subplot(1, 2, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')

plt.subplot(1, 2, 2)
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Normalized Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')

plt.tight_layout()
plt.show()


NameError: name 'LabelEncoder' is not defined

In [22]:
# Save the trained model and the scaler for future use
model.save('new-model1.0.3.keras')
joblib.dump(scaler, 'new-scaler1.0.3.pkl')

['new-scaler1.0.2.pkl']

In [23]:
  # Return the new path so the user can download it