In [None]:
import os
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from scipy.optimize import linprog

import warnings
warnings.filterwarnings('ignore')
from IPython.display import display, Markdown

data_directory='../datasets/preprocessed'

# https://www.tarekatwan.com/index.php/2017/12/methods-for-testing-linear-separability-in-python/?fbclid=IwAR3BIWrhdU0M5Bzc5mPMYuwOsxSWiNt9wk1S8S2IYAJB5T2oKbHBdvVR3Z4

In [None]:
# Data loading
frames={}
for filename in os.listdir(data_directory):
    data=pd.read_csv(f"{data_directory}/{filename}")
    frames[filename.split('.')[0]]=data
    
adult_train=pd.concat([frames['adult_train_x'], frames['adult_train_y']], axis=1)
adult_test=pd.concat([frames['adult_test_x'], frames['adult_test_y']], axis=1)
adult=pd.concat([adult_train, adult_test], axis=0)
credit_train=pd.concat([frames['credit_train_x'], frames['credit_train_y']], axis=1)
credit_test=pd.concat([frames['credit_test_x'], frames['credit_test_y']], axis=1)
credit=pd.concat([credit_train, credit_test], axis=0)
sick_train=pd.concat([frames['sick_train_x'], frames['sick_train_y']], axis=1)
sick_test=pd.concat([frames['sick_test_x'], frames['sick_test_y']], axis=1)
sick=pd.concat([sick_train, sick_test], axis=0)
titanic_train=pd.concat([frames['titanic_train_x'], frames['titanic_train_y']], axis=1)
titanic_test=pd.concat([frames['titanic_test_x'], frames['titanic_test_y']], axis=1)
titanic=pd.concat([titanic_train, titanic_test], axis=0)

adult_X=adult.drop(columns=['target'])
adult_y=adult[['target']]
credit_X=credit.drop(columns=['target'])
credit_y=credit[['target']]
sick_X=sick.drop(columns=['target'])
sick_y=sick[['target']]
titanic_X=titanic.drop(columns=['target'])
titanic_y=titanic[['target']]

In [None]:
# Linear separability check
datasets=[(credit_X, credit_y), (sick_X, sick_y), (titanic_X, titanic_y), (adult_X, adult_y)]
datasets_names=["Credit", "Sick", "Titanic", "Adult"]

iter_num=-1
for x, y in datasets:
    iter_num = iter_num+1
    df=pd.concat([x, y], axis=1)
    display(Markdown(f'### {datasets_names[iter_num]} dataset'))
    #Perform feature scaling
    sc= StandardScaler()
    x = sc.fit_transform(x)

    # Perceptron
    perceptron = Perceptron(random_state = 0)
    perceptron.fit(x, y)
    predicted = perceptron.predict(x)

    cm = confusion_matrix(y, predicted)
    plt.clf() 
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
    classNames = ['Negative','Positive']
    plt.title('Perceptron Confusion Matrix')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    tick_marks = np.arange(len(classNames))
    plt.xticks(tick_marks, classNames, rotation=45)
    plt.yticks(tick_marks, classNames)
    s = [['TN','FP'], ['FN', 'TP']]

    for i in range(2):
        for j in range(2):
            plt.text((j-0.15),i, str(s[i][j])+" = "+str(cm[i][j]))
    plt.show()
    
    for i in [0,1]:
        df["newTarget"] = np.where(df['target'] == i, 1 , -1)

        sc = StandardScaler()
        tmp = df.iloc[:,[2,3]].values
        tmp = sc.fit_transform(tmp)

        xx = np.array(df.newTarget.values.reshape(-1,1) * tmp)
        t = np.where(df['target'] == i, 1 , -1)

        #2-D array which, when matrix-multiplied by x, gives the values of 
        #the upper-bound inequality constraints at x.
        A_ub = np.append(xx, t.reshape(-1,1), 1)

        #1-D array of values representing the upper-bound of each 
        #inequality constraint (row) in A_ub.
        b_ub = np.repeat(-1, A_ub.shape[0]).reshape(-1,1)

        # Coefficients of the linear objective function to be minimized.
        c_obj = np.repeat(1, A_ub.shape[1])
        res = linprog(c=c_obj, A_ub=A_ub, b_ub=b_ub,
                      options={"disp": False})

        if res.success:
            print('There is linear separability between {} and the other class'.format(i))
        else:
            print('No linear separability between {} and the other class'.format(i))