<a href="https://colab.research.google.com/github/cdtlaura/nlp2/blob/main/Multiclass_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load and Explore the Data

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_excel('/content/Dry_Bean_Dataset.xlsx', sheet_name='Dry_Beans_Dataset')


# Display the first few rows of the dataset
print(df.head())

# Display the column names
print(df.columns)

    Area  Perimeter  MajorAxisLength  MinorAxisLength  AspectRation  \
0  28395    610.291       208.178117       173.888747      1.197191   
1  28734    638.018       200.524796       182.734419      1.097356   
2  29380    624.110       212.826130       175.931143      1.209713   
3  30008    645.884       210.557999       182.516516      1.153638   
4  30140    620.134       201.847882       190.279279      1.060798   

   Eccentricity  ConvexArea  EquivDiameter    Extent  Solidity  roundness  \
0      0.549812       28715     190.141097  0.763923  0.988856   0.958027   
1      0.411785       29172     191.272750  0.783968  0.984986   0.887034   
2      0.562727       29690     193.410904  0.778113  0.989559   0.947849   
3      0.498616       30724     195.467062  0.782681  0.976696   0.903936   
4      0.333680       30417     195.896503  0.773098  0.990893   0.984877   

   Compactness  ShapeFactor1  ShapeFactor2  ShapeFactor3  ShapeFactor4  Class  
0     0.913358      0.007332  

Preprocess the Data

In [None]:
from sklearn.preprocessing import LabelEncoder

# Handle missing values if necessary
df = df.dropna()

# Separate input features and target variable
X = df.drop(columns=['Class'])  # 'Class' is the target column
y = df['Class'].values

# Encode the target variable y (Class)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Converts the three Class types into 0, 1, 3, 4, 5 ,6 or 7

# Check the number of features
n_features = X.shape[1]

Split the Data into Train and Test Sets

In [None]:
from sklearn.model_selection import train_test_split

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


Define the Model for Multiclass Classification

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(n_features,)))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(7, activation='softmax'))  # Changed to 7 neurons to match 7 classes (0-6) (Seker, Barbunya, Bombay, Cali, Dermosan, Horoz and Sira)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Normalize the Data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Use Early Stopping
Early stopping helps prevent overfitting by stopping training once the model performance stops improving on the validation set:

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_train, y_train, epochs=300, batch_size=16, validation_split=0.2, callbacks=[early_stopping], verbose=1)


Epoch 1/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9413 - loss: 0.1396 - val_accuracy: 0.9287 - val_loss: 0.2356
Epoch 2/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9464 - loss: 0.1462 - val_accuracy: 0.9304 - val_loss: 0.2303
Epoch 3/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9426 - loss: 0.1521 - val_accuracy: 0.9282 - val_loss: 0.2237
Epoch 4/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9393 - loss: 0.1552 - val_accuracy: 0.9298 - val_loss: 0.2230
Epoch 5/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9413 - loss: 0.1513 - val_accuracy: 0.9282 - val_loss: 0.2329
Epoch 6/300
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9419 - loss: 0.1512 - val_accuracy: 0.9211 - val_loss: 0.2369
Epoch 7/300
[1m456/45

<keras.src.callbacks.history.History at 0x78430f1ce230>

In [None]:
# Evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)

Test Accuracy: 0.929


In [None]:
# Example prediction
import numpy as np

sample = np.array([[28395, 610.291, 208.178117, 173.888747041636, 1.19719142411602, 0.549812187138347, 28715, 190.141097274511, 0.763922518159806, 0.988855998607, 0.958027126250128, 0.913357754795763, 0.00733150613518321, 0.00314728916733569, 0.834222388245556, 0.998723889013168]])  # Example input for a new Class
prediction = model.predict(sample)

# Convert the prediction probabilities into a class label
predicted_class = np.argmax(prediction, axis=1)
print('Predicted class:', predicted_class)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
Predicted class: [1]


In [None]:
# Display the mapping of encoded labels
for idx, label in enumerate(label_encoder.classes_):
    print(f"{label}: {idx}")


BARBUNYA: 0
BOMBAY: 1
CALI: 2
DERMASON: 3
HOROZ: 4
SEKER: 5
SIRA: 6
