In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, hamming_loss
import tensorflow as tf
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# image_directory = "/content/images/"
images = ('/content/drive/My Drive/images')


df = pd.read_csv('/content/dataset.csv')
print(df.head(5))
print(df.columns)

  filename  Balanced  Crossbar  High Crossbar  Left Do Crossbar  \
0    0.png                   1              0                 0   
1    1.png                   1              0                 0   
2    2.png                   1              0                 0   
3    3.png                   0              0                 0   
4    4.png                   1              1                 0   

   Short Length Crossbar  Down Sloping  Right Do Crossbar  Upsloping  \
0                      1             0                  0          0   
1                      0             0                  1          1   
2                      0             0                  0          1   
3                      0             0                  1          0   
4                      0             0                  0          0   

   Lengthy Crossbar  Flat Crossbar  Low Crossbar  Medium Length Crossbar  
0                 0              1             0                       0  
1             

In [None]:
X_dataset = []
for i in tqdm(range(df.shape[0])):
    img = tf.keras.utils.load_img(images +'/'+df['filename'][i], tf.float32, color_mode='grayscale')
    img =  tf.keras.utils.img_to_array(img)
    img = img/255
    X_dataset.append(img)
X = np.array(X_dataset)

100%|██████████| 1996/1996 [18:53<00:00,  1.76it/s]


In [None]:
Y = np.array(df.drop(['filename'], axis=1))

In [None]:
type(X)

numpy.ndarray

In [None]:
print(X.shape)
new_size = 784
new_X = X.reshape(1996, new_size)
print(new_X.shape)

(1996, 28, 28, 1)
(1996, 784)


In [None]:
x_train, x_test, y_train, y_test = train_test_split(new_X, Y, random_state=42, test_size = 0.2)
x_train.shape

(1596, 784)

In [None]:
y_train.shape

(1596, 11)

In [None]:
# Train the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=700, random_state=42)
clf.fit(x_train, y_train)

# Predict labels for the test data
y_pred = clf.predict(x_test)

# Calculate accuracy and hamming loss
accuracy = accuracy_score(y_test, y_pred)
# hamming_loss = hamming_loss(y_test, y_pred)

print("Accuracy:", accuracy)
print("Hamming Loss:", hamming_loss)


Accuracy: 0.125
Hamming Loss: 0.19272727272727272


# SVM

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import f1_score

In [None]:
# Convert multi-label format of y into 1D array
new_y_train = np.argmax(y_train, axis=1)
new_y_test = np.argmax(y_test, axis=1)

In [None]:
# Train the SVM classifier
svm_classifier = SVC(kernel='linear', C=1, probability=True) # Linear kernel with regularization parameter C=1
svm_classifier.fit(x_train, new_y_train)

# Predict the labels of the test set
y_pred = svm_classifier.predict(x_test)

# Evaluate the classifier using F1 score
f1score = f1_score(new_y_test, y_pred, average='weighted')
print("F1 score: ", f1score)


F1 score:  0.45939027914249025


In [None]:
#Accuracy
accuracy = accuracy_score(new_y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.4725


## Changing the RBF Kernel

In [None]:
svm = SVC(kernel='rbf')

In [None]:
svm.fit(x_train, new_y_train)

# Predict the labels of the test set
y_pred = svm.predict(x_test)

In [None]:
#Accuracy
accuracy = accuracy_score(new_y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.575


## Changing the Sigmoid Kernel

In [None]:
svm = SVC(kernel='sigmoid')

In [None]:
svm.fit(x_train, new_y_train)

# Predict the labels of the test set
y_pred = svm.predict(x_test)

In [None]:
#Accuracy
accuracy = accuracy_score(new_y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5125


### USING HYPER Parameter Tuning for SVM Algorithm

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

# Create an SVM classifier
svm = SVC()

# Perform grid search to find the optimal hyperparameters
grid_search = GridSearchCV(svm, param_grid, cv=5)
grid_search.fit(x_train, new_y_train)



In [None]:
# Print the optimal hyperparameters
print('Optimal hyperparameters:', grid_search.best_params_)

# Evaluate the performance of the classifier with the optimal hyperparameters
svm = SVC(**grid_search.best_params_)
svm.fit(x_train, new_y_train)
y_pred = svm.predict(x_test)
f1score = f1_score(new_y_test, y_pred, average='micro')
print('F1 score:', f1score)

Optimal hyperparameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
F1 score: 0.575


### Using Bagging with SVM

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC

# Initialize the base SVM classifier
base_classifier = SVC(kernel='rbf', C=1, gamma='scale')

# Initialize the bagging classifier with the base classifier
bagging_classifier = BaggingClassifier(base_estimator=base_classifier, n_estimators=15)

# Train the bagging classifier on the training data
bagging_classifier.fit(x_train, new_y_train)

# Make predictions on the test data
y_pred = bagging_classifier.predict(x_test)




In [None]:
f1score = f1_score(new_y_test, y_pred, average='micro')
print('F1 score:', f1score)

F1 score: 0.5725


In [None]:
#Accuracy
accuracy = accuracy_score(new_y_test, y_pred)
print("Accuracy%:", accuracy*100)

Accuracy%: 57.49999999999999


## Using Boosting

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report

# create a base SVM classifier with RBF kernel
base_clf = SVC(kernel='rbf', C=1, gamma='scale', probability=True)

# create an AdaBoost classifier with 50 weak SVM classifiers
clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=50)

# train the classifier on the training data
clf.fit(x_train, new_y_train)

# evaluate the performance on the test data
y_pred = clf.predict(x_test)
print(classification_report(new_y_test, y_pred))




              precision    recall  f1-score   support

           0       0.35      1.00      0.52       140
           1       0.00      0.00      0.00        84
           2       0.00      0.00      0.00        58
           3       0.00      0.00      0.00        21
           4       0.00      0.00      0.00        25
           5       0.00      0.00      0.00        38
           6       0.00      0.00      0.00         8
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00        24

    accuracy                           0.35       400
   macro avg       0.04      0.11      0.06       400
weighted avg       0.12      0.35      0.18       400



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# create a KNN classifier
clf = KNeighborsClassifier()

# define the hyperparameters to search over
param_grid = {'n_neighbors': [3, 5, 7, 9], 'weights': ['uniform', 'distance'], 'p': [1, 2]}

# create a grid search object
grid = GridSearchCV(clf, param_grid, cv=5, scoring='f1_macro')

# fit the grid search object to the training data
grid.fit(x_train, new_y_train)

# print the best hyperparameters found by grid search
print("Best hyperparameters:", grid.best_params_)

# evaluate the performance on the test data using the best hyperparameters
y_pred = grid.predict(x_test)
print(classification_report(new_y_test, y_pred))




Best hyperparameters: {'n_neighbors': 7, 'p': 2, 'weights': 'distance'}
              precision    recall  f1-score   support

           0       0.53      0.69      0.60       140
           1       0.59      0.61      0.60        84
           2       0.62      0.55      0.58        58
           3       0.25      0.24      0.24        21
           4       0.00      0.00      0.00        25
           5       0.65      0.68      0.67        38
           6       0.00      0.00      0.00         8
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00        24

    accuracy                           0.53       400
   macro avg       0.29      0.31      0.30       400
weighted avg       0.47      0.53      0.50       400



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## **Creating SVM for each Trait**

In [None]:
import csv

# initialize the lists to store the values
y_t1 = []
y_t2 = []
y_t3 = []
y_t4 = []
y_t5 = []
y_t6 = []
y_t7 = []
y_t8 = []
y_t9 = []
y_t10 = []
y_t11 = []

with open('/content/dataset.csv', 'r') as file:
    reader = csv.reader(file)
    # skip the first row (header)
    next(reader)

    # iterate through each row and extract the values from columns 2 to 12
    for row in reader:
        y_t1.append(float(row[1]))
        y_t2.append(float(row[2]))
        y_t3.append(float(row[3]))
        y_t4.append(float(row[4]))
        y_t5.append(float(row[5]))
        y_t6.append(float(row[6]))
        y_t7.append(float(row[7]))
        y_t8.append(float(row[8]))
        y_t9.append(float(row[9]))
        y_t10.append(float(row[10]))
        y_t11.append(float(row[11]))


In [None]:
y_t3

In [None]:
# Code for classifier 1
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t1, test_size=.10, random_state=8)
clf1 = SVC(kernel='rbf')
clf1.fit(X_train, y_train)
print("Classifier 1 accuracy: ", accuracy_score(clf1.predict(X_test), y_test))
# Code for classifier 2
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t2, test_size=.10, random_state=8)
clf2 = SVC(kernel='rbf')
clf2.fit(X_train, y_train)
print("Classifier 2 accuracy: ", accuracy_score(clf2.predict(X_test), y_test))

# Code for classifier 3
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t3, test_size=.10, random_state=8)
clf3 = SVC(kernel='rbf')
clf3.fit(X_train, y_train)
print("Classifier 3 accuracy: ", accuracy_score(clf3.predict(X_test), y_test))

# Code for classifier 4
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t4, test_size=.10, random_state=8)
clf4 = SVC(kernel='rbf')
clf4.fit(X_train, y_train)
print("Classifier 4 accuracy: ", accuracy_score(clf4.predict(X_test), y_test))

# Code for classifier 5
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t5, test_size=.10, random_state=8)
clf5 = SVC(kernel='rbf')
clf5.fit(X_train, y_train)
print("Classifier 5 accuracy: ", accuracy_score(clf5.predict(X_test), y_test))

# Code for classifier 6
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t6, test_size=.10, random_state=8)
clf6 = SVC(kernel='rbf')
clf6.fit(X_train, y_train)
print("Classifier 6 accuracy: ", accuracy_score(clf6.predict(X_test), y_test))

# Code for classifier 7
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t7, test_size=.10, random_state=8)
clf7 = SVC(kernel='rbf')
clf7.fit(X_train, y_train)
print("Classifier 7 accuracy: ", accuracy_score(clf7.predict(X_test), y_test))

# Code for classifier 8
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t8, test_size=.10, random_state=8)
clf8 = SVC(kernel='rbf')
clf8.fit(X_train, y_train)
print("Classifier 8 accuracy: ", accuracy_score(clf8.predict(X_test), y_test))

# Code for classifier 9
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t9, test_size=.10, random_state=8)
clf9 = SVC(kernel='rbf')
clf9.fit(X_train, y_train)
print("Classifier 9 accuracy: ", accuracy_score(clf9.predict(X_test), y_test))

# Code for classifier 10
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t10, test_size=.10, random_state=8)
clf10 = SVC(kernel='rbf')
clf10.fit(X_train, y_train)
print("Classifier 10 accuracy: ", accuracy_score(clf10.predict(X_test), y_test))

# Code for classifier 11
X_train, X_test, y_train, y_test = train_test_split(new_X, y_t11, test_size=.10, random_state=8)
clf11 = SVC(kernel='rbf')
clf11.fit(X_train, y_train)
print("Classifier 11 accuracy: ", accuracy_score(clf11.predict(X_test), y_test))

Classifier 1 accuracy:  0.795
Classifier 2 accuracy:  0.92
Classifier 3 accuracy:  0.78
Classifier 4 accuracy:  0.865
Classifier 5 accuracy:  0.835
Classifier 6 accuracy:  0.87
Classifier 7 accuracy:  0.805
Classifier 8 accuracy:  0.84
Classifier 9 accuracy:  0.655
Classifier 10 accuracy:  0.92
Classifier 11 accuracy:  0.78


Average accuracy: 0.8231818181818182

In [None]:
# Load the image using the provided filename
img = tf.keras.utils.load_img('/content/10910.png', tf.float32, color_mode='grayscale')

# Convert the loaded image to a numpy array
img = tf.keras.utils.img_to_array(img)

# Normalize the pixel values of the image between 0 and 1
img = img/255.0

# Reshape the image to a 2D tensor with dimensions (1, heigh* width)
img = np.reshape(img, (1, 784))




In [None]:
!pip install joblib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import joblib

# Saving all the classifiers
for i in range(1, 12):
    filename = f"classifier_{i}.joblib"
    joblib.dump(eval(f"clf{i}"), filename)

In [None]:
# Define a dictionary that maps index to label
label_map = {
    0: 'Balanced Crossbar',
    1: 'High Crossbar',
    2: 'Left Do Crossbar',
    3: 'Short Length Crossbar',
    4: 'Down Sloping',
    5: 'Right Do Crossbar',
    6: 'Upsloping',
    7: 'Lengthy Crossbar',
    8: 'Flat Crossbar',
    9: 'Low Crossbar',
    10: 'Medium Length Crossbar'
}

# Define a list to store the predictions for each classifier
predictions = []

# Loop over the 11 classifiers
for i in range(1, 12):
    # Load the classifier
    clf = joblib.load(f'classifier_{i}.joblib')

    # Make a prediction for the image using the classifier
    prediction = clf.predict(img)

    # Add the prediction to the list of predictions
    predictions.append(prediction[0])

# Print the list of predictions for each classifier
for i, pred in enumerate(predictions):
    label = label_map[i]
    print(f'{label}: [{pred}]')


Balanced Crossbar: [0.0]
High Crossbar: [0.0]
Left Do Crossbar: [1.0]
Short Length Crossbar: [0.0]
Down Sloping: [0.0]
Right Do Crossbar: [0.0]
Upsloping: [0.0]
Lengthy Crossbar: [0.0]
Flat Crossbar: [1.0]
Low Crossbar: [1.0]
Medium Length Crossbar: [0.0]
