In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# store column name
columns = ['compactness',
 'circularity',
 'distance_circularity',
 'radius_ratio',
 'pr.axis_aspect_ratio',
 'max_length_aspect_ratio',
 'scatter_ratio',
 'elongatedness',
 'pr.axis_rectangularity',
 'max_length_rectangularity',
 'scaled_variance_along_major_axis',
 'scaled_variance_along_minor_axis',
 'scaled_radius_of_gyration',
 'skewness_about_major_axis',
 'skewness_about_minor_axis',
 'kurtosis_about_minor_axis',
 'kurtosis_about_major_axis',
 'hollows_ratio',
 'class']

# imp columns as per paper
feature_selected_col = ['distance_circularity',
'scatter_ratio',
'elongatedness',
'pr.axis_rectangularity',
'scaled_variance_along_major_axis',
'scaled_variance_along_minor_axis','class']

In [3]:
dfAllClasses = pd.read_csv(r"D:\OneDrive - NITT\Custom_Download\car_data_fina\4_classes.csv")
dfBusNoBus = pd.read_csv(r"D:\OneDrive - NITT\Custom_Download\car_data_fina\bus vs no bus.csv")
df3Classes = pd.read_csv(r"D:\OneDrive - NITT\Custom_Download\car_data_fina\three_classes_two_car_combined.csv")
dfTwoCars = pd.read_csv(r"D:\OneDrive - NITT\Custom_Download\car_data_fina\two cars.csv")
dfVanNoVan = pd.read_csv(r"D:\OneDrive - NITT\Custom_Download\car_data_fina\van Vs no van.csv")

In [4]:
dfAllClasses.columns = columns
dfBusNoBus.columns = columns
df3Classes.columns = columns
dfTwoCars.columns = columns
dfVanNoVan.columns = columns

In [5]:
def get_result(df,FeatureSelection = False):
    
    df = df.copy()
    
    if FeatureSelection:
        df = df[feature_selected_col]
        
    X = df.drop('class',axis=1)
    y = df[['class']]

    # Splitting the dataset into the Training set and Test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Fitting Decision Tree Classification to the Training set
    clf = LogisticRegression(n_jobs=-1,random_state = 100)
    clf.fit(X_train, y_train)

    # Predicting the Test set results
    y_prediction = clf.predict(X_test)

    # Making the Confusion Matrix
    print(classification_report(y_test, y_prediction))
    report = classification_report(y_test, y_prediction,output_dict=True)
    return report

In [6]:
def generate_table(AllClasses,ThreeClass,vanNoVan,busNobus,twoCar):
    result = pd.DataFrame([],columns = ['Classification Accuracy','Precision','Recall'])

    result.loc['all_classes'] = [AllClasses['accuracy'],AllClasses['weighted avg']['precision'],AllClasses['weighted avg']['recall']]
    result.loc['class3'] = [ThreeClass['accuracy'],ThreeClass['weighted avg']['precision'],ThreeClass['weighted avg']['recall']]
    result.loc['vanNoVan'] = [vanNoVan['accuracy'],vanNoVan['weighted avg']['precision'],vanNoVan['weighted avg']['recall']]
    result.loc['busNobus'] = [busNobus['accuracy'],busNobus['weighted avg']['precision'],busNobus['weighted avg']['recall']]
    result.loc['twoCar'] = [twoCar['accuracy'],twoCar['weighted avg']['precision'],twoCar['weighted avg']['recall']]
    
    return result

# Without feature engineering

# all_classes

In [7]:
AllClasses = get_result(dfAllClasses)

              precision    recall  f1-score   support

           1       0.91      0.97      0.94        40
           2       0.75      0.56      0.64        48
           3       0.89      0.94      0.92        52
           4       0.56      0.67      0.61        30

    accuracy                           0.79       170
   macro avg       0.78      0.79      0.78       170
weighted avg       0.80      0.79      0.79       170



# 3_classes

In [8]:
ThreeClass = get_result(df3Classes)

              precision    recall  f1-score   support

           1       0.93      0.97      0.95        40
           2       0.92      0.94      0.93        52
           3       0.96      0.92      0.94        78

    accuracy                           0.94       170
   macro avg       0.94      0.95      0.94       170
weighted avg       0.94      0.94      0.94       170



# van_no_van

In [9]:
vanNoVan = get_result(dfVanNoVan)

              precision    recall  f1-score   support

           1       0.95      0.95      0.95        40
           2       0.98      0.98      0.98       130

    accuracy                           0.98       170
   macro avg       0.97      0.97      0.97       170
weighted avg       0.98      0.98      0.98       170



# bus_no_bus

In [10]:
busNobus = get_result(dfBusNoBus)

              precision    recall  f1-score   support

           1       0.92      0.92      0.92        52
           2       0.97      0.97      0.97       118

    accuracy                           0.95       170
   macro avg       0.94      0.94      0.94       170
weighted avg       0.95      0.95      0.95       170



# two_cars

In [11]:
twoCar = get_result(dfTwoCars)

              precision    recall  f1-score   support

           1       0.56      0.71      0.63        35
           2       0.76      0.61      0.67        51

    accuracy                           0.65        86
   macro avg       0.66      0.66      0.65        86
weighted avg       0.67      0.65      0.65        86



# <font color='blue'>For Table 1: Classification results [no feature selection]</font>

In [12]:
generate_table(AllClasses,ThreeClass,vanNoVan,busNobus,twoCar)

Unnamed: 0,Classification Accuracy,Precision,Recall
all_classes,0.794118,0.795724,0.794118
class3,0.941176,0.941755,0.941176
vanNoVan,0.976471,0.976471,0.976471
busNobus,0.952941,0.952941,0.952941
twoCar,0.651163,0.674482,0.651163


# with feature engineering
#####################################################################################################

# all_classes

In [13]:
AllClasses = get_result(dfAllClasses,True)

              precision    recall  f1-score   support

           1       0.72      0.95      0.82        40
           2       0.66      0.40      0.49        48
           3       0.73      0.92      0.81        52
           4       0.45      0.33      0.38        30

    accuracy                           0.68       170
   macro avg       0.64      0.65      0.63       170
weighted avg       0.66      0.68      0.65       170



# 3_classes

In [14]:
ThreeClass = get_result(df3Classes,True)

              precision    recall  f1-score   support

           1       0.78      0.80      0.79        40
           2       0.75      0.69      0.72        52
           3       0.70      0.73      0.72        78

    accuracy                           0.74       170
   macro avg       0.74      0.74      0.74       170
weighted avg       0.74      0.74      0.74       170



# van_no_van

In [15]:
vanNoVan = get_result(dfVanNoVan,True)

              precision    recall  f1-score   support

           1       0.82      0.70      0.76        40
           2       0.91      0.95      0.93       130

    accuracy                           0.89       170
   macro avg       0.87      0.83      0.84       170
weighted avg       0.89      0.89      0.89       170



# bus_no_bus

In [16]:
busNobus = get_result(dfBusNoBus,True)

              precision    recall  f1-score   support

           1       0.78      0.60      0.67        52
           2       0.84      0.92      0.88       118

    accuracy                           0.82       170
   macro avg       0.81      0.76      0.78       170
weighted avg       0.82      0.82      0.82       170



# two_cars

In [17]:
twoCar = get_result(dfTwoCars,True)

              precision    recall  f1-score   support

           1       0.43      0.83      0.56        35
           2       0.67      0.24      0.35        51

    accuracy                           0.48        86
   macro avg       0.55      0.53      0.46        86
weighted avg       0.57      0.48      0.44        86



# <font color='blue'>For Table 2: Classification results [with feature selection]</font>

In [18]:
generate_table(AllClasses,ThreeClass,vanNoVan,busNobus,twoCar)

Unnamed: 0,Classification Accuracy,Precision,Recall
all_classes,0.676471,0.656365,0.676471
class3,0.735294,0.735932,0.735294
vanNoVan,0.894118,0.891003,0.894118
busNobus,0.823529,0.81905,0.823529
twoCar,0.476744,0.568912,0.476744
