In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report

In [2]:
columns = ['compactness',
 'circularity',
 'distance_circularity',
 'radius_ratio',
 'pr.axis_aspect_ratio',
 'max_length_aspect_ratio',
 'scatter_ratio',
 'elongatedness',
 'pr.axis_rectangularity',
 'max_length_rectangularity',
 'scaled_variance_along_major_axis',
 'scaled_variance_along_minor_axis',
 'scaled_radius_of_gyration',
 'skewness_about_major_axis',
 'skewness_about_minor_axis',
 'kurtosis_about_minor_axis',
 'kurtosis_about_major_axis',
 'hollows_ratio',
 'class']


imp_col = ['distance_circularity',
'scatter_ratio',
'elongatedness',
'pr.axis_rectangularity',
'scaled_variance_along_major_axis',
'scaled_variance_along_minor_axis','class']

In [3]:
# dataset path
df_full = pd.read_csv('D:\OneDrive - NITT\Custom_Download\combine (1).csv',header=None,names=columns)

In [4]:
df_full.isna().sum()

compactness                         0
circularity                         0
distance_circularity                0
radius_ratio                        0
pr.axis_aspect_ratio                0
max_length_aspect_ratio             0
scatter_ratio                       0
elongatedness                       0
pr.axis_rectangularity              0
max_length_rectangularity           0
scaled_variance_along_major_axis    0
scaled_variance_along_minor_axis    0
scaled_radius_of_gyration           0
skewness_about_major_axis           0
skewness_about_minor_axis           0
kurtosis_about_minor_axis           0
kurtosis_about_major_axis           0
hollows_ratio                       0
class                               0
dtype: int64

In [5]:
df_full['class'].value_counts()

bus     218
saab    217
opel    212
van     199
Name: class, dtype: int64

In [6]:
df_all_classes = df_full.copy()
df_3_classes = df_full.copy()
df_van_no_van = df_full.copy()
df_bus_no_bus = df_full.copy()
df_two_cars = df_full.copy()

In [7]:
df_3_classes['class'] = df_3_classes['class'].replace(['saab'],['opel'])
df_van_no_van['class'] = df_van_no_van['class'].replace(['opel','bus','saab'],['No_van']*3)
df_bus_no_bus['class'] = df_bus_no_bus['class'].replace(['opel','van','saab'],['No_bus']*3)
df_two_cars = df_two_cars[df_two_cars['class'].isin(['saab','opel'])]

In [8]:
df_two_cars

Unnamed: 0,compactness,circularity,distance_circularity,radius_ratio,pr.axis_aspect_ratio,max_length_aspect_ratio,scatter_ratio,elongatedness,pr.axis_rectangularity,max_length_rectangularity,scaled_variance_along_major_axis,scaled_variance_along_minor_axis,scaled_radius_of_gyration,skewness_about_major_axis,skewness_about_minor_axis,kurtosis_about_minor_axis,kurtosis_about_major_axis,hollows_ratio,class
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,saab
9,93,44,98,197,62,11,183,36,22,146,202,505,152,64,4,14,195,204,saab
11,90,34,66,136,55,6,123,54,17,118,148,224,118,65,5,26,196,202,saab
15,96,55,103,201,65,9,204,32,23,166,227,624,246,74,6,2,186,194,opel
18,104,54,100,186,61,10,216,31,24,173,225,686,220,74,5,11,185,195,saab
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
837,94,46,77,169,60,8,158,42,20,148,181,373,181,67,12,2,193,199,saab
840,93,34,66,140,56,7,130,51,18,120,151,251,114,62,5,29,201,207,opel
841,93,39,87,183,64,8,169,40,20,134,200,422,149,72,7,25,188,195,saab
843,106,54,101,222,67,12,222,30,25,173,228,721,200,70,3,4,187,201,saab


In [9]:
def get_result(data,feature_engg = False):
    
    df = data.copy()
    
    if feature_engg:
        df = df[imp_col]
        
    X = df.drop('class',axis=1)
    y = df[['class']]

    # Splitting the dataset into the Training set and Test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Fitting Decision Tree Classification to the Training set
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    # Making the Confusion Matrix
    print(classification_report(y_test, y_pred))
    cp = classification_report(y_test, y_pred,output_dict=True)
    return cp

# Without feature engineering

# all_classes

In [10]:
all_class = get_result(df_all_classes)

              precision    recall  f1-score   support

         bus       0.98      0.94      0.96        52
        opel       0.47      0.63      0.54        30
        saab       0.68      0.54      0.60        48
         van       0.93      0.97      0.95        40

    accuracy                           0.78       170
   macro avg       0.77      0.77      0.76       170
weighted avg       0.80      0.78      0.78       170



# 3_classes

In [11]:
class3 = get_result(df_3_classes)

              precision    recall  f1-score   support

         bus       0.98      0.96      0.97        52
        opel       0.97      0.90      0.93        78
         van       0.83      0.97      0.90        40

    accuracy                           0.94       170
   macro avg       0.93      0.94      0.93       170
weighted avg       0.94      0.94      0.94       170



# van_no_van

In [12]:
vanNoVan = get_result(df_van_no_van)

              precision    recall  f1-score   support

      No_van       0.95      0.95      0.95       130
         van       0.85      0.82      0.84        40

    accuracy                           0.92       170
   macro avg       0.90      0.89      0.89       170
weighted avg       0.92      0.92      0.92       170



# bus_no_bus

In [13]:
busNobus = get_result(df_bus_no_bus)

              precision    recall  f1-score   support

      No_bus       0.97      0.98      0.97       118
         bus       0.96      0.92      0.94        52

    accuracy                           0.96       170
   macro avg       0.96      0.95      0.96       170
weighted avg       0.96      0.96      0.96       170



# two_cars

In [14]:
twoCar = get_result(df_two_cars)

              precision    recall  f1-score   support

        opel       0.64      0.65      0.64        43
        saab       0.64      0.63      0.64        43

    accuracy                           0.64        86
   macro avg       0.64      0.64      0.64        86
weighted avg       0.64      0.64      0.64        86



# <font color='red'>For Table 1: Classification results [no feature selection]</font>

In [15]:
result_1 = pd.DataFrame([],columns = ['Classification Accuracy','Precision','Recall'])

result_1.loc['all_classes'] = [all_class['accuracy'],all_class['weighted avg']['precision'],all_class['weighted avg']['recall']]
result_1.loc['class3'] = [class3['accuracy'],class3['weighted avg']['precision'],class3['weighted avg']['recall']]
result_1.loc['vanNoVan'] = [vanNoVan['accuracy'],vanNoVan['weighted avg']['precision'],vanNoVan['weighted avg']['recall']]
result_1.loc['busNobus'] = [busNobus['accuracy'],busNobus['weighted avg']['precision'],busNobus['weighted avg']['recall']]
result_1.loc['twoCar'] = [twoCar['accuracy'],twoCar['weighted avg']['precision'],twoCar['weighted avg']['recall']]

In [16]:
result_1

Unnamed: 0,Classification Accuracy,Precision,Recall
all_classes,0.782353,0.795264,0.782353
class3,0.935294,0.941207,0.935294
vanNoVan,0.923529,0.922939,0.923529
busNobus,0.964706,0.964627,0.964706
twoCar,0.639535,0.63961,0.639535


# with feature engineering
#####################################################################################################

# all_classes

In [17]:
all_class = get_result(df_all_classes,True)

              precision    recall  f1-score   support

         bus       0.94      0.87      0.90        52
        opel       0.45      0.57      0.50        30
        saab       0.60      0.54      0.57        48
         van       0.83      0.85      0.84        40

    accuracy                           0.72       170
   macro avg       0.70      0.71      0.70       170
weighted avg       0.73      0.72      0.72       170



# 3_classes

In [18]:
class3 = get_result(df_3_classes,True)

              precision    recall  f1-score   support

         bus       0.91      0.92      0.91        52
        opel       0.86      0.85      0.85        78
         van       0.80      0.80      0.80        40

    accuracy                           0.86       170
   macro avg       0.85      0.86      0.86       170
weighted avg       0.86      0.86      0.86       170



# van_no_van

In [19]:
vanNoVan = get_result(df_van_no_van,True)

              precision    recall  f1-score   support

      No_van       0.92      0.95      0.93       130
         van       0.81      0.72      0.76        40

    accuracy                           0.89       170
   macro avg       0.86      0.84      0.85       170
weighted avg       0.89      0.89      0.89       170



# bus_no_bus

In [20]:
busNobus = get_result(df_bus_no_bus,True)

              precision    recall  f1-score   support

      No_bus       0.96      0.96      0.96       118
         bus       0.90      0.90      0.90        52

    accuracy                           0.94       170
   macro avg       0.93      0.93      0.93       170
weighted avg       0.94      0.94      0.94       170



# two_cars

In [21]:
twoCar = get_result(df_two_cars,True)

              precision    recall  f1-score   support

        opel       0.59      0.60      0.60        43
        saab       0.60      0.58      0.59        43

    accuracy                           0.59        86
   macro avg       0.59      0.59      0.59        86
weighted avg       0.59      0.59      0.59        86



# <font color='red'>For Table 1: Classification results [with feature selection]</font>

In [22]:
result_2 = pd.DataFrame([],columns = ['Classification Accuracy','Precision','Recall'])

result_2.loc['all_classes'] = [all_class['accuracy'],all_class['weighted avg']['precision'],all_class['weighted avg']['recall']]
result_2.loc['class3'] = [class3['accuracy'],class3['weighted avg']['precision'],class3['weighted avg']['recall']]
result_2.loc['vanNoVan'] = [vanNoVan['accuracy'],vanNoVan['weighted avg']['precision'],vanNoVan['weighted avg']['recall']]
result_2.loc['busNobus'] = [busNobus['accuracy'],busNobus['weighted avg']['precision'],busNobus['weighted avg']['recall']]
result_2.loc['twoCar'] = [twoCar['accuracy'],twoCar['weighted avg']['precision'],twoCar['weighted avg']['recall']]

In [23]:
result_2

Unnamed: 0,Classification Accuracy,Precision,Recall
all_classes,0.717647,0.731559,0.717647
class3,0.858824,0.858538,0.858824
vanNoVan,0.894118,0.891474,0.894118
busNobus,0.941176,0.941176,0.941176
twoCar,0.593023,0.593074,0.593023
