In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape, Conv1D, MaxPooling1D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import plotly.express as px

In [4]:
df = pd.read_csv('/kaggle/input/caltech/EdgeHistogram.csv', header=None, skiprows=1, names=['Dimensions'])
df['Index'] = df['Dimensions'].str.split(';').str[0].astype(int)
df['Dimensions'] = df['Dimensions'].str.split(';', n=1).str[1]
df.set_index('Index', inplace=True)
df

Unnamed: 0_level_0,Dimensions
Index,Unnamed: 1_level_1
1,1;1;1;2;2;2;4;4;2;2;2;2;3;3;3;2;3;3;3;2;5;0;1;...
2,2;2;1;0;3;0;4;2;2;4;0;5;0;0;4;2;3;0;0;3;5;1;2;...
3,5;1;6;2;6;3;2;7;1;6;4;1;7;0;6;3;3;3;1;6;4;2;5;...
4,0;0;0;0;0;0;0;1;0;1;0;0;2;0;1;0;0;1;0;1;0;0;1;...
5,1;6;4;2;2;0;6;6;5;4;1;7;1;5;3;5;1;4;4;6;3;0;3;...
...,...
9140,5;3;1;4;1;2;5;4;4;3;2;5;4;5;3;3;4;5;3;3;6;1;4;...
9141,0;0;5;1;2;1;0;7;3;6;3;1;3;7;4;0;0;0;2;2;2;1;7;...
9142,3;3;2;1;2;0;5;4;3;3;1;4;2;3;4;3;4;1;3;2;4;2;2;...
9143,0;0;0;0;0;0;0;1;0;0;1;3;5;1;4;0;1;4;1;2;1;1;2;...


In [5]:
images = pd.read_csv('/kaggle/input/caltech/Images.csv', skiprows=1, header=None, names=['Class'])
images['Index'] = images['Class'].str.split(';').str[0].astype(int)
images['Class'] = images['Class'].str.split(';', n=1).str[1]
images.set_index('Index', inplace=True)
images

Unnamed: 0_level_0,Class
Index,Unnamed: 1_level_1
1,binocular
2,chair
3,tick
4,minaret
5,Faces
...,...
9140,cougar_face
9141,accordion
9142,Faces
9143,grand_piano


In [6]:
# Assuming 'Index' is the common index in both DataFrames
Data = pd.merge(df, images, how='left', left_index=True, right_index=True)

# Split the 'Dimensions' column based on ';'
dimensions_split = Data['Dimensions'].str.split(';', expand=True)

# Add the split columns to the DataFrame
Data = pd.concat([dimensions_split,Data], axis=1)

# Drop the original 'Dimensions' column if needed
Data.drop(['Dimensions'], axis=1, inplace=True)

# Display the updated DataFrame
Data


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,71,72,73,74,75,76,77,78,79,Class
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,1,1,2,2,2,4,4,2,2,...,2,2,3,3,1,1,1,1,2,binocular
2,2,2,1,0,3,0,4,2,2,4,...,4,3,3,5,2,2,1,0,4,chair
3,5,1,6,2,6,3,2,7,1,6,...,4,7,0,6,3,2,7,3,6,tick
4,0,0,0,0,0,0,0,1,0,1,...,0,1,0,1,0,0,1,0,1,minaret
5,1,6,4,2,2,0,6,6,5,4,...,2,6,3,3,6,1,2,5,6,Faces
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9140,5,3,1,4,1,2,5,4,4,3,...,4,4,6,5,5,0,6,5,6,cougar_face
9141,0,0,5,1,2,1,0,7,3,6,...,1,7,3,7,1,0,4,1,3,accordion
9142,3,3,2,1,2,0,5,4,3,3,...,3,6,1,2,2,3,4,4,4,Faces
9143,0,0,0,0,0,0,0,1,0,0,...,0,2,1,1,0,0,0,0,0,grand_piano


In [7]:
# Create a LabelEncoder instance
label_encoder = LabelEncoder()

# Fit and transform the 'Class' column to numerical values
Data['Class'] = label_encoder.fit_transform(Data['Class'])

Data = Data.apply(pd.to_numeric, errors='coerce').astype('Int32')

# Display the mapping of original class names to numerical values
class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [8]:
Data['Class'].unique(),Data['Class'].dtypes

(<IntegerArray>
 [11, 22, 91, 65,  1,  4, 77,  2, 94, 34,
  ...
  89, 32, 62, 31, 53, 83, 29, 17, 27, 44]
 Length: 101, dtype: Int32,
 Int32Dtype())

In [9]:
X=Data.iloc[:,:-1]
y=Data.iloc[:,-1]

# Assuming X contains features and y contains labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Display the shape of the resulting sets
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (6858, 80)
Shape of X_test: (2286, 80)
Shape of y_train: (6858,)
Shape of y_test: (2286,)


# Support Vector Machine

In [10]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Create an SVM Classifier
svm_classifier = SVC(random_state=42)

# Train the classifier on the training data
svm_classifier.fit(X_train, y_train)

# Predict on the test data
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("Accuracy:", accuracy_svm)


Accuracy: 0.5489938757655293


In [11]:
# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))

# Create the confusion matrix
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)

# Display the confusion matrix with true class names on both index and columns
conf_df_svm = pd.DataFrame(conf_matrix_svm, index=sorted(class_mapping.keys(), key=lambda x: x.lower()), columns=sorted(class_mapping.keys(), key=lambda x: x.lower()))
conf_df_svm.to_csv('group062_result1.csv', sep=',', index_label='Class')


Classification Report:
              precision    recall  f1-score   support

         0.0       0.16      0.71      0.26       123
         1.0       0.89      0.95      0.92       228
         2.0       0.69      0.84      0.75        55
         3.0       0.89      0.98      0.93       213
         4.0       0.86      0.86      0.86        14
         5.0       0.83      0.98      0.89       201
         6.0       0.67      0.29      0.40         7
         7.0       0.00      0.00      0.00         9
         8.0       0.00      0.00      0.00        13
         9.0       0.00      0.00      0.00        15
        10.0       0.00      0.00      0.00        15
        11.0       1.00      0.10      0.18        10
        12.0       0.52      0.52      0.52        25
        13.0       0.35      0.53      0.42        17
        14.0       0.50      0.10      0.17        10
        15.0       0.72      0.62      0.67        21
        16.0       0.42      0.24      0.30        21
    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
conf_df_svm

Unnamed: 0,accordion,airplanes,anchor,ant,BACKGROUND_Google,barrel,bass,beaver,binocular,bonsai,...,tick,trilobite,umbrella,watch,water_lilly,wheelchair,wild_cat,windsor_chair,wrench,yin_yang
accordion,87,4,3,2,1,4,0,0,0,0,...,0,0,0,3,0,0,0,0,0,0
airplanes,4,217,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
anchor,5,0,46,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ant,3,0,0,209,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BACKGROUND_Google,1,0,0,0,12,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wheelchair,14,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
wild_cat,2,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
windsor_chair,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,9,0,0
wrench,6,0,0,0,0,4,0,0,0,0,...,0,0,0,0,0,0,0,0,3,0


In [13]:
# Save hyperparameters for SVM
svm_params = {
    'classifier_name': 'SVC',
    'library': 'sklearn',
    'test_size': 0.25,
    'C': svm_classifier.C,  # C parameter in SVM
    'kernel': svm_classifier.kernel,  # Kernel type in SVM
    'gamma': svm_classifier.gamma,  # Gamma parameter in SVM
    'random_state': svm_classifier.random_state
}
svm_params_df = pd.DataFrame(list(svm_params.items()), columns=['Parameter', 'Value'])
svm_params_df.to_csv('group062_parameters1.csv', sep=',', index=False)


In [14]:
svm_params_df

Unnamed: 0,Parameter,Value
0,classifier_name,SVC
1,library,sklearn
2,test_size,0.25
3,C,1.0
4,kernel,rbf
5,gamma,scale
6,random_state,42


# XGBClassifier

In [15]:
# Create an XGBoost Classifier
from xgboost import XGBClassifier
xgb_classifier = XGBClassifier(random_state=42)

# Train the classifier on the training data
xgb_classifier.fit(X_train, y_train)

# Predict on the test data
y_pred_xgb = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print("Accuracy:", accuracy_xgb)

# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_xgb))



Accuracy: 0.5288713910761155
Classification Report:
              precision    recall  f1-score   support

         0.0       0.22      0.41      0.29       123
         1.0       0.82      0.91      0.86       228
         2.0       0.70      0.84      0.76        55
         3.0       0.85      0.96      0.90       213
         4.0       0.71      0.86      0.77        14
         5.0       0.86      0.93      0.89       201
         6.0       0.33      0.14      0.20         7
         7.0       0.00      0.00      0.00         9
         8.0       0.50      0.08      0.13        13
         9.0       0.17      0.07      0.10        15
        10.0       0.20      0.07      0.10        15
        11.0       0.43      0.30      0.35        10
        12.0       0.31      0.44      0.37        25
        13.0       0.40      0.35      0.38        17
        14.0       0.25      0.20      0.22        10
        15.0       0.44      0.52      0.48        21
        16.0       0.23      

In [16]:
# Confusion Matrix for XGBoost
conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb)
conf_df_xgb = pd.DataFrame(conf_matrix_xgb, index=sorted(class_mapping.keys(), key=lambda x: x.lower()), columns=sorted(class_mapping.keys(), key=lambda x: x.lower()))
conf_df_xgb.columns.name = 'Class'
conf_df_xgb.to_csv('group062_result2.csv', sep=',', index_label='Class')
conf_df_xgb

Class,accordion,airplanes,anchor,ant,BACKGROUND_Google,barrel,bass,beaver,binocular,bonsai,...,tick,trilobite,umbrella,watch,water_lilly,wheelchair,wild_cat,windsor_chair,wrench,yin_yang
accordion,51,5,3,3,0,7,0,0,0,0,...,1,0,0,3,0,0,0,0,0,1
airplanes,5,208,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
anchor,1,0,46,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ant,3,0,1,204,0,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BACKGROUND_Google,0,0,0,0,12,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wheelchair,5,0,1,0,0,0,0,0,1,0,...,0,0,0,1,0,3,0,0,0,0
wild_cat,0,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
windsor_chair,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,12,0,0
wrench,2,1,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,5,0


In [17]:
# Save hyperparameters for XGBoost
xgb_params = {
    'classifier_name': 'XGBClassifier',
    'library': 'xgboost',
    'test_size': 0.25,
    'n_estimators': xgb_classifier.n_estimators,
    'max_depth': xgb_classifier.max_depth,
    'learning_rate': xgb_classifier.learning_rate,
    'random_state': xgb_classifier.random_state
}
xgb_params_df = pd.DataFrame(list(xgb_params.items()), columns=['Parameter', 'Value'])
xgb_params_df.to_csv('group062_parameters2.csv', sep=',', index=False)

xgb_params_df


Unnamed: 0,Parameter,Value
0,classifier_name,XGBClassifier
1,library,xgboost
2,test_size,0.25
3,n_estimators,
4,max_depth,
5,learning_rate,
6,random_state,42


# Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Create a Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
dt_classifier.fit(X_train, y_train)

# Predict on the test data
y_pred_dt = dt_classifier.predict(X_test)
class_report_dt = classification_report(y_test, y_pred_dt)
print(class_report_dt)


              precision    recall  f1-score   support

         0.0       0.16      0.16      0.16       123
         1.0       0.79      0.72      0.76       228
         2.0       0.55      0.44      0.48        55
         3.0       0.79      0.78      0.79       213
         4.0       0.40      0.43      0.41        14
         5.0       0.83      0.80      0.82       201
         6.0       0.09      0.14      0.11         7
         7.0       0.08      0.11      0.10         9
         8.0       0.00      0.00      0.00        13
         9.0       0.00      0.00      0.00        15
        10.0       0.08      0.07      0.07        15
        11.0       0.00      0.00      0.00        10
        12.0       0.10      0.16      0.12        25
        13.0       0.13      0.24      0.17        17
        14.0       0.08      0.10      0.09        10
        15.0       0.12      0.14      0.13        21
        16.0       0.06      0.05      0.05        21
        17.0       0.00    

In [19]:
# Confusion Matrix for XGBoost
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)
conf_df_dt = pd.DataFrame(conf_matrix_dt, index=sorted(class_mapping.keys(), key=lambda x: x.lower()), columns=sorted(class_mapping.keys(), key=lambda x: x.lower()))
conf_df_dt.columns.name = 'Class'
conf_df_dt.to_csv('group062_result3.csv', sep=',', index_label='Class')
conf_df_dt

Class,accordion,airplanes,anchor,ant,BACKGROUND_Google,barrel,bass,beaver,binocular,bonsai,...,tick,trilobite,umbrella,watch,water_lilly,wheelchair,wild_cat,windsor_chair,wrench,yin_yang
accordion,20,0,1,3,1,2,1,0,0,1,...,1,1,1,3,1,3,1,1,0,3
airplanes,4,165,0,1,0,1,0,2,3,0,...,1,0,0,1,0,1,0,0,0,0
anchor,3,0,24,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ant,1,1,0,166,0,2,0,0,1,0,...,0,1,0,1,0,0,2,0,1,0
BACKGROUND_Google,0,0,0,0,6,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wheelchair,4,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
wild_cat,1,0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
windsor_chair,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,6,0,0
wrench,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [20]:
# Save hyperparameters for Decision Tree
dt_params = {
    'classifier_name': 'DecisionTreeClassifier',
    'library': 'sklearn',
    'test_size': 0.25,
    'max_depth': dt_classifier.max_depth,
    'min_samples_split': dt_classifier.min_samples_split,
    'min_samples_leaf': dt_classifier.min_samples_leaf,
    'random_state': dt_classifier.random_state
}
dt_params_df = pd.DataFrame(list(dt_params.items()), columns=['Parameter', 'Value'])
dt_params_df.to_csv('group062_parameters3.csv', sep=',', index=False)

dt_params_df


Unnamed: 0,Parameter,Value
0,classifier_name,DecisionTreeClassifier
1,library,sklearn
2,test_size,0.25
3,max_depth,
4,min_samples_split,2
5,min_samples_leaf,1
6,random_state,42
