In [1]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pyplot as plt
import warnings
from feature_engineering import *
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# assume X_train and y_train are the training data and labels respectively
df = pd.read_csv("../data/dataset1.csv")
df.head(-1)
df_imputed = mean_imputation(df)
X_train, X_test, y_train, y_test = train_test_split(df_imputed, shuffle = False)

In [3]:
# build the FLDM model
fldm = LinearDiscriminantAnalysis(n_components=1)
fldm.fit(X_train, y_train)

# project the training data onto the 1-dimensional FLDM space
X_train_lda = fldm.transform(X_train)

In [4]:
# find the decision boundary in the 1-dimensional FLDM space
mean_pos = np.mean(X_train_lda[y_train == 1])
mean_neg = np.mean(X_train_lda[y_train == -1])
std_pos = np.std(X_train_lda[y_train == 1])
std_neg = np.std(X_train_lda[y_train == -1])

threshold = (mean_pos + mean_neg) / 2

# assume X_test and y_test are the testing data and labels respectively

In [5]:
# project the testing data onto the 1-dimensional FLDM space
X_test_lda = fldm.transform(X_test)

# evaluate the performance of the model on the testing data
y_pred = np.where(X_test_lda > threshold, 1, -1)
evaluate(y_test, y_pred)

Confusion Matrix: {'true_positive': 2, 'true_negative': 3, 'false_positive': 142, 'false_negative': 41}
Accuracy: 2.6595744680851063%
Precision: 1.3888888888888888%
Recall: 4.651162790697675%


## Task 2

In [9]:
X_train.shape

(381, 31)

In [16]:
# randomly shuffle the order of features in the training data
np.random.seed(42)
n_features = X_train.shape[1]
feature_order = np.random.permutation(n_features)
X_train_shuffled = X_train[X_train.columns[feature_order]]

In [17]:
# build the FLDM model
fldm = LinearDiscriminantAnalysis(n_components=1)
fldm.fit(X_train_shuffled, y_train)

# project the training data onto the 1-dimensional FLDM space
X_train_lda = fldm.transform(X_train_shuffled)

In [19]:
# find the decision boundary in the 1-dimensional FLDM space
mean_pos = np.mean(X_train_lda[y_train == 1])
mean_neg = np.mean(X_train_lda[y_train == -1])
std_pos = np.std(X_train_lda[y_train == 1])
std_neg = np.std(X_train_lda[y_train == -1])

threshold = (mean_pos + mean_neg) / 2

# shuffle the order of features in the testing data
X_test_shuffled = X_test[X_test.columns[feature_order]]

# project the testing data onto the 1-dimensional FLDM space
X_test_lda = fldm.transform(X_test_shuffled)

# evaluate the performance of the model on the testing data
y_pred = np.where(X_test_lda > threshold, 1, -1)
evaluate(y_test, y_pred)

Confusion Matrix: {'true_positive': 41, 'true_negative': 142, 'false_positive': 3, 'false_negative': 2}
Accuracy: 97.34042553191489%
Precision: 93.18181818181819%
Recall: 95.34883720930233%
