In [8]:
import pandas as pd
import numpy as np
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

In [2]:
glass_ds = pd.read_csv('../new_glass.csv')
X = glass_ds.drop(columns='Type')
Y = glass_ds['Type']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, stratify=Y)

## KNN

In [None]:
knn = KNeighborsClassifier()

plain_sbs = SFS(knn, 
          k_features=(1, 100), 
          forward=False,
          floating=False, 
          scoring='accuracy',
          cv=10,
          n_jobs=-1)

plain_sbs.fit(X_train, y_train, custom_feature_names=X.columns)

In [None]:
plain_sbs.k_score_

In [None]:
selected_features = X.columns[list(plain_sbs.k_feature_idx_)]

In [None]:
x_t = X_train[selected_features]
x_t

In [None]:
clf = KNeighborsClassifier()

clf.fit(x_t, y_train)

In [None]:
x_test_filtered = X_test[selected_features]
y_test_pred = clf.predict(x_test_filtered)
print(classification_report(y_test, y_test_pred))
print(cross_val_score(clf, X_train, y_train, cv=10).mean())

## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
# Sequential Backward Selection
gnb = GaussianNB()

nb_sbs = SFS(gnb, 
          k_features=(1, 10), 
          forward=False, 
          floating=False, 
          scoring='accuracy',
          cv=10)

nb_sbs.fit(X_train, y_train, custom_feature_names=X.columns)

In [50]:
selected_features = X.columns[list(nb_sbs.k_feature_idx_)]
x_nb = X_train[selected_features]

gnb_ = GaussianNB()
gnb_.fit(x_nb, y_train)

GaussianNB()

In [51]:
x_test_filtered = X_test[selected_features]
y_test_pred = gnb_.predict(x_test_filtered)
print(classification_report(y_test, y_test_pred))
print(cross_val_score(gnb_, X_train, y_train, cv=10).mean())

              precision    recall  f1-score   support

           1       0.64      0.82      0.72        11
           2       0.75      0.75      0.75        12
           3       0.00      0.00      0.00         3
           5       1.00      1.00      1.00         2
           6       0.50      1.00      0.67         1
           7       1.00      0.75      0.86         4

    accuracy                           0.73        33
   macro avg       0.65      0.72      0.67        33
weighted avg       0.68      0.73      0.70        33

0.375438596491228


## Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression

In [13]:
# Sequential Backward Selection
lr_sbs = SFS(LogisticRegression(), 
          k_features=(1, 10), 
          forward=False, 
          floating=False, 
          scoring='accuracy',
          cv=10) # -1 means ALL CPU

lr_sbs.fit(X, Y, custom_feature_names=X.columns)

SequentialFeatureSelector(cv=10, estimator=LogisticRegression(), forward=False,
                          k_features=(1, 10), scoring='accuracy')

In [14]:
selected_features = X.columns[list(lr_sbs.k_feature_idx_)]
x_lr = X_train[selected_features]
lr = LogisticRegression()
lr.fit(x_lr, y_train)

LogisticRegression()

In [17]:
x_test_filtered = X_train[selected_features]
y_test_pred = lr.predict(x_test_filtered)
print(classification_report(y_train, y_test_pred))
print(cross_val_score(lr, X_train, y_train, cv=10).mean())

              precision    recall  f1-score   support

           1       0.62      0.73      0.67        59
           2       0.61      0.70      0.65        64
           3       0.00      0.00      0.00        14
           5       0.80      0.36      0.50        11
           6       0.89      1.00      0.94         8
           7       0.92      0.88      0.90        25

    accuracy                           0.67       181
   macro avg       0.64      0.61      0.61       181
weighted avg       0.63      0.67      0.65       181

0.6236842105263157


## SVM

In [4]:
from sklearn import svm
from sklearn.svm import LinearSVC

In [10]:
# Sequential Backward Selection
svm_sbs = SFS(svm.SVC(), 
          k_features=(1, 100), 
          forward=False, 
          floating=False, 
          scoring='accuracy',
          cv=10) # -1 means ALL CPU

svm_sbs.fit(X, Y, custom_feature_names=X.columns)

SequentialFeatureSelector(cv=10, estimator=SVC(), forward=False,
                          k_features=(1, 100), scoring='accuracy')

In [18]:
selected_features = X.columns[list(svm_sbs.k_feature_idx_)]
x_lr = X_train[selected_features]
svm_clf = svm.SVC()
svm_clf.fit(x_lr, y_train)

x_test_filtered = X_train[selected_features]
y_test_pred = svm_clf.predict(x_test_filtered)
print(classification_report(y_train, y_test_pred))
print(cross_val_score(svm_clf, X_train, y_train, cv=10).mean())

              precision    recall  f1-score   support

           1       0.62      0.81      0.71        59
           2       0.58      0.70      0.63        64
           3       0.00      0.00      0.00        14
           5       0.00      0.00      0.00        11
           6       0.00      0.00      0.00         8
           7       0.81      0.84      0.82        25

    accuracy                           0.63       181
   macro avg       0.33      0.39      0.36       181
weighted avg       0.52      0.63      0.57       181

0.4587719298245614
