In [None]:
# Import package
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Plot parameter setting 
plt.style.use('default')
plt.rcParams['axes.titlesize'] = 18
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['legend.fontsize'] = "large"
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['figure.figsize'] = 8, 6
plt.rcParams['figure.dpi'] = 100

In [None]:
# Load data
training = pd.read_csv("./feature_training.csv", header=0, index_col=0)
training.reset_index(inplace=True, drop=True)
testing = pd.read_csv("./feature_testing.csv", header=0, index_col=0)
testing.reset_index(inplace=True, drop=True)


$$ \text{Fisher's ratio} = \frac{(\mu_1 - \mu_2)^2}{\sigma_1^2 + \sigma_2^2} $$

In [None]:
# Fisher Ratio
Feature = pd.concat([testing, training]) # Combine two dataset for Fisher's ratio
Feature.loc["Fisher 1"] = "" # New "row" to store Fisher's ratio
Feature.loc["Fisher 2"] = ""

# Seperate data to three class
Fclean = Feature[Feature.Surface == "Clean"]
Fl1 = Feature[Feature.Surface == "L1"]
Fl2 = Feature[Feature.Surface == "L2"]

# Go through each features to calculate Fisher's ratio
for feature in Feature.columns[2:]:

    # Your code for calculating mean and std each class

    Feature.loc["Fisher 1", feature] = # Your code for Fisher's ratio
    Feature.loc["Fisher 2", feature] = # Your code for Fisher's ratio


Feature.loc["Fisher mix"] = Feature.loc["Fisher 1"] + Feature.loc["Fisher 2"] # New "row" for sum of Fisher 1 and Fisher 2
Feature


In [None]:
# Fisher plot
barwidth = 0.4
x1 = np.arange(0, 10)
x2 = [x + barwidth for x in x1]
plt.bar(x1, Feature.loc["Fisher 1"][2:], width=barwidth, label="Fisher 1")
plt.bar(x2, Feature.loc["Fisher 2"][2:], width=barwidth, label="Fisher 2")
plt.xticks(x1)

# Your code to add legend, label and anyother useful information

In [None]:
# LDA & QDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 
selected = ["11", "12"] # Include your selected features

x_train = training[selected]
y_train = training["Surface"]
x_test = testing[selected]
y_test = testing["Surface"]

LDA = LinearDiscriminantAnalysis()
QDA = QuadraticDiscriminantAnalysis()
LDA.fit(x_train, y_train)
QDA.fit(x_train, y_train)
cm = confusion_matrix(QDA.predict(x_test), y_test) # Change for different classifier
class_label = ["Clean", "L1", "L2"]
ConfusionMatrixDisplay(cm, display_labels=class_label).plot()

In [None]:
# Go through all combination
from itertools import  combinations
selected = ["11", "12"] # Include your selected features

# Build a dataframe to store m, feature index, LDA_error, QDA_error
CombinList = pd.DataFrame(columns=["m", "feature", "LDA", "QDA"])

for n in range(2, 6):
    combine = list(combinations(selected, n)) # Get all combination (including "n" elements from "selected")
    for c in combine:
        c = list(c)
        x_train = training[c]
        y_train = training["Surface"]
        x_test = testing[c]
        y_test = testing["Surface"]
        
        # LDA
        lda = LinearDiscriminantAnalysis()
        lda.fit(x_train, y_train)
        lda_e = 1-accuracy_score(lda.predict(x_test), y_test) # Get the misclassification rate

        # Your code for QDA
        qda_e = 0 # Change zero to your code for missclassification rate

        CombinList.loc[len(CombinList)] = [len(c), c, lda_e, qda_e]

CombinList

In [None]:
# m-plot
for m in range(2, 6):
    # count repeated rate
    count = CombinList.loc[CombinList.m==m, "LDA"].value_counts()
    
    # adjust the scatter size base on repeated value
    for c in count.index:
        plt.scatter(m, c, s=200+(count[c]-1)*800, c="blue", alpha=0.2)
    
    # Average
    error_mean = CombinList.loc[CombinList.m==m, "LDA"].mean()
    plt.scatter(m, error_mean, color="green", marker="x")

# Create legend manually
legend_elements = [
    plt.Line2D([0], [0],  color="white", marker="o",  markerfacecolor="blue", alpha=0.2,  markersize=10, label="error"),
    plt.Line2D([0], [0],  color="white", marker="x",  markeredgecolor="green", markersize=5, label="mean")
]

plt.grid()
plt.legend(handles=legend_elements)
plt.title("LDA - Testing Data")
plt.xlabel("m")
plt.ylabel("Misclassification rate")