In [19]:
import numpy as np
import pandas as pd

from sklearn.metrics import make_scorer, cohen_kappa_score, classification_report
from sklearn.model_selection import  GridSearchCV, StratifiedKFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.feature_selection import SequentialFeatureSelector,  SelectFromModel
from sklearn.svm import SVC, LinearSVC

import warnings
warnings.filterwarnings("ignore")

In [13]:
df_dme = pd.read_csv('Texture_DME.csv')
df_normal = pd.read_csv('Texture_NORMAL.csv')

n_dme = df_dme.shape[0]
n_normal = df_normal.shape[0]

label_one = pd.Series([1] * n_dme )
label_one.name = 'Label'
label_zero = pd.Series([0] * n_normal )
label_zero.name = 'Label'

df_dme.drop(df_dme.columns[0], axis = 1, inplace = True)
df_normal.drop(df_normal.columns[0], axis = 1, inplace = True)

pandas_normal = pd.concat([label_zero, df_normal], axis=1)
pandas_dme = pd.concat([label_one, df_dme], axis=1)

Data = pd.concat([pandas_normal, pandas_dme], axis=0, ignore_index=True)

X = Data.drop('Label', axis=1)
y = Data['Label']

In [3]:
# columns_n = np.arange(0, 144, 1)
# np.random.seed(123)
# selected_numbers = np.random.choice(columns_n, size=10, replace=False)
# print(selected_numbers)


# X_train, X_test, y_train, y_test = train_test_split(X.iloc[:, selected_numbers ], y, test_size=0.1, random_state=100, stratify=y)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=100, stratify=y)

k_fold = 8
kfold = StratifiedKFold(n_splits = k_fold, random_state=100, shuffle=True) # StratifiedKFold is an extension of KFold where it maintains the distribution of the target variable within each fold.

In [15]:
model = SelectFromModel(estimator = LinearSVC(dual=False, max_iter= 20000), prefit=False)

pipe = Pipeline(steps=[
                 ('scale', RobustScaler()),
                 ('SFM', model ), 
                 ('SVC', SVC() )
                  ])

param_grid = [
# Linear
{'SFM__estimator__C': 2.**np.arange(4, 15, 1), 
 'SFM__estimator__penalty': ['l1', 'l2'], 
 'SFM__max_features': [5, 10, 20, 30, 40, 50], 
 'SVC__kernel': ['linear'], 'SVC__C': 2.**np.arange(4, 15, 1)}, 
# radial basis function (rbf) kernel
{'SFM__estimator__C': 2.**np.arange(4, 15, 1),
 'SFM__estimator__penalty': ['l1', 'l2'],
 'SFM__max_features': [5, 10, 20, 30, 40, 50],
 'SVC__kernel': [ 'rbf'],
 'SVC__C': 2.**np.arange(-5,16, 1),
 'SVC__gamma': 2.**np.arange(-7,8,1)}
]

svc_model = GridSearchCV(estimator = pipe,
                  param_grid = param_grid,
                  scoring = make_scorer(cohen_kappa_score),
                  n_jobs = -1,
                  cv = k_fold,
                  refit = True,
                  verbose = 1)

svc_model.fit(X_train, y_train)

Fitting 8 folds for each of 43032 candidates, totalling 344256 fits


In [16]:
selected = svc_model.best_estimator_.named_steps["SFM"].get_support(indices=True) 

X_train_new, X_test_new,  y_train_new, y_test_new = X_train.iloc[:, selected], X_test.iloc[:, selected],  y_train * 1, y_test * 1

### Model final

In [17]:
pipe_final_svc = Pipeline(steps=[
                 ('scale', RobustScaler()),
                 ('SVC', SVC())
                 ])

best_paramsSVC = {key: value for key, value in svc_model.best_params_.items()  if key != 'SFM__estimator__C' and key != 'SFM__estimator__penalty' and key != 'SFM__max_features'}
pipe_final_svc.set_params(**best_paramsSVC).fit(X_train_new, y_train_new)
pipe_final_svc.fit(X_train_new, y_train_new)

predictions_train = pipe_final_svc.predict(X_train_new)
predictions_test = pipe_final_svc.predict(X_test_new)

metrics_svc = {'Model_train': cohen_kappa_score(predictions_train, y_train_new), 'Model_test':cohen_kappa_score(predictions_test, y_test_new)}

In [26]:
pd.DataFrame(classification_report(y_train, predictions_train, output_dict=True)).to_latex

# print(pd.DataFrame(classification_report(y_test, predictions_test, output_dict=True)))

<bound method NDFrame.to_latex of               0     1  accuracy  macro avg  weighted avg
precision   1.0   1.0       1.0        1.0           1.0
recall      1.0   1.0       1.0        1.0           1.0
f1-score    1.0   1.0       1.0        1.0           1.0
support    45.0  45.0       1.0       90.0          90.0>

In [24]:
print(classification_report(y_train, predictions_train))

print(classification_report(y_test, predictions_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        45
           1       1.00      1.00      1.00        45

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00         5

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



### Save the results in csv 

In [18]:
selected_keys = list(svc_model.cv_results_.keys())[list(svc_model.cv_results_.keys()).index('params'):]
df = pd.DataFrame({key: svc_model.cv_results_[key] for key in selected_keys if key in svc_model.cv_results_})
df_params = pd.json_normalize(df['params'])
df_combined = pd.concat([df.drop(columns=['params']), df_params], axis=1)
# df_combined.query('mean_test_score >= 0.6902')

data_dict = {
    'Sheet1': df_combined,
    'Sheet2': {
        'best_params': svc_model.best_params_,
        'best_score': svc_model.best_score_,
        'best_params_final': best_paramsSVC,
        'selected_columns': np.array(X_train.columns)[selected],
        'model_train': metrics_svc
    }
}

with pd.ExcelWriter('Result_SVC_CV_Final.xlsx') as writer:
    for sheet_name, data in data_dict.items():
        if isinstance(data, pd.DataFrame):
            data.to_excel(writer, sheet_name=sheet_name)
        else:
            pd.DataFrame([data]).to_excel(writer, sheet_name=sheet_name, index=False, header=False)