In [53]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [54]:
class CustomLDA12:
    def __init__(self, class_weight_ratio=1.0):
        self.W = None
        self.B = None
        self.class_weight_ratio = class_weight_ratio

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # 计均值向量
        mean_class1 = np.mean(X[y == 1], axis=0)
        mean_class2 = np.mean(X[y == 2], axis=0)

        # 矩陣
        S1 = np.dot((X[y == 1] - mean_class1).T, X[y == 1] - mean_class1)
        S2 = np.dot((X[y == 2] - mean_class2).T, X[y == 2] - mean_class2)
        
        # 權重
        S1 *= self.class_weight_ratio
        within_class_scatter = S1 + S2

        self.W = np.dot(np.linalg.inv(within_class_scatter), (mean_class1 - mean_class2))

        self.B = -0.5 * np.dot(mean_class1, np.dot(np.linalg.inv(within_class_scatter), mean_class1)) + 0.5 * np.dot(mean_class2, np.dot(np.linalg.inv(within_class_scatter), mean_class2))

    def predict(self, X):
        if self.W is None or self.B is None:
            raise ValueError("The model has not been trained yet.")
        scores = np.dot(X, self.W) + self.B
        predictions = np.where(scores >= 0, 1, 2)
        return predictions

    def get_weights(self):
        return self.W

    def get_bias(self):
        return self.B
class CustomLDA13:
    def __init__(self, class_weight_ratio=1.0):
        self.W = None
        self.B = None
        self.class_weight_ratio = class_weight_ratio

    def fit(self, X, y):
        n_samples, n_features = X.shape

        mean_class1 = np.mean(X[y == 1], axis=0)
        mean_class3 = np.mean(X[y == 3], axis=0)

        S1 = np.dot((X[y == 1] - mean_class1).T, X[y == 1] - mean_class1)
        S3 = np.dot((X[y == 3] - mean_class3).T, X[y == 3] - mean_class3)
        
        S1 *= self.class_weight_ratio
        within_class_scatter = S1 + S3

        self.W = np.dot(np.linalg.inv(within_class_scatter), (mean_class1 - mean_class3))

        self.B = -0.5 * np.dot(mean_class1, np.dot(np.linalg.inv(within_class_scatter), mean_class1)) + 0.5 * np.dot(mean_class3, np.dot(np.linalg.inv(within_class_scatter), mean_class3))

    def predict(self, X):
        if self.W is None or self.B is None:
            raise ValueError("The model has not been trained yet.")
        scores = np.dot(X, self.W) + self.B
        predictions = np.where(scores >= 0, 1, 3)  # 更改条件以适应新的标签
        return predictions

    def get_weights(self):
        return self.W

    def get_bias(self):
        return self.B
class CustomLDA23:
    def __init__(self, class_weight_ratio=1.0):
        self.W = None
        self.B = None
        self.class_weight_ratio = class_weight_ratio

    def fit(self, X, y):
        n_samples, n_features = X.shape

        mean_class2 = np.mean(X[y == 2], axis=0)
        mean_class3 = np.mean(X[y == 3], axis=0)

        S2 = np.dot((X[y == 2] - mean_class2).T, X[y == 2] - mean_class2)
        S3 = np.dot((X[y == 3] - mean_class3).T, X[y == 3] - mean_class3)
        
        S2 *= self.class_weight_ratio
        within_class_scatter = S2 + S3

        self.W = np.dot(np.linalg.inv(within_class_scatter), (mean_class2 - mean_class3))

        self.B = -0.5 * np.dot(mean_class2, np.dot(np.linalg.inv(within_class_scatter), mean_class2)) + 0.5 * np.dot(mean_class3, np.dot(np.linalg.inv(within_class_scatter), mean_class3))

    def predict(self, X):
        if self.W is None or self.B is None:
            raise ValueError("The model has not been trained yet.")
        scores = np.dot(X, self.W) + self.B
        predictions = np.where(scores >= 0, 2, 3)  # 更改条件以适应新的标签
        return predictions

    def get_weights(self):
        return self.W

    def get_bias(self):
        return self.B


In [55]:
data_head_12 = pd.read_csv('iris_head_12.txt', delim_whitespace=True)
data_tail_12 = pd.read_csv('iris_tail_12.txt', delim_whitespace=True)
data_head_23 = pd.read_csv('iris_head_23.txt', delim_whitespace=True)
data_tail_23 = pd.read_csv('iris_tail_23.txt', delim_whitespace=True)
data_head_13 = pd.read_csv('iris_head_13.txt', delim_whitespace=True)
data_tail_13 = pd.read_csv('iris_tail_13.txt', delim_whitespace=True)

y_head_12 = data_head_12['species'].values
y_tail_12 = data_tail_12['species'].values
y_head_13 = data_head_13['species'].values
y_tail_13 = data_tail_13['species'].values
y_head_23 = data_head_23['species'].values
y_tail_23 = data_tail_23['species'].values

columns_to_drop = ["species","Sepal_length","Sepal_width"]

x_head_12 = data_head_12.drop(columns_to_drop, axis=1).values
x_tail_12 = data_tail_12.drop(columns_to_drop, axis=1).values
x_head_13 = data_head_13.drop(columns_to_drop, axis=1).values
x_tail_13 = data_tail_13.drop(columns_to_drop, axis=1).values
x_head_23 = data_head_23.drop(columns_to_drop, axis=1).values
x_tail_23 = data_tail_23.drop(columns_to_drop, axis=1).values

# LDA
lda_classifier_12 = CustomLDA12(class_weight_ratio=1)
lda_classifier_12.fit(x_tail_12, y_tail_12)
lda_classifier_13 = CustomLDA13(class_weight_ratio=1)
lda_classifier_13.fit(x_tail_13, y_tail_13)
lda_classifier_23 = CustomLDA23(class_weight_ratio=1)
lda_classifier_23.fit(x_tail_23, y_tail_23)


In [56]:
#分類率
predictions_12 = lda_classifier_12.predict(x_head_12)
classification_rate_12 = np.mean(predictions_12 == y_head_12) * 100

predictions_13 = lda_classifier_13.predict(x_head_13)
classification_rate_13 = np.mean(predictions_13 == y_head_13) * 100

predictions_23 = lda_classifier_23.predict(x_head_23)
classification_rate_23 = np.mean(predictions_23 == y_head_23) * 100

print("Classification Rate for Class 1 vs. Class 2:", classification_rate_12, "%")
print("Classification Rate for Class 1 vs. Class 3:", classification_rate_13, "%")
print("Classification Rate for Class 2 vs. Class 3:", classification_rate_23, "%")


Classification Rate for Class 1 vs. Class 2: 100.0 %
Classification Rate for Class 1 vs. Class 3: 100.0 %
Classification Rate for Class 2 vs. Class 3: 94.0 %


In [57]:
# 測試資料
data_head_3 = pd.read_csv('iris_head3.txt', delim_whitespace=True)

y_head_3 = data_head_3['species'].values

columns_to_drop = ["species", "Sepal_length", "Sepal_width"]
x_head_3 = data_head_3.drop(columns_to_drop, axis=1).values

# 丟到三個裡面預測
predictions_12 = lda_classifier_12.predict(x_head_3)
predictions_13 = lda_classifier_13.predict(x_head_3)
predictions_23 = lda_classifier_23.predict(x_head_3)

final_predictions = []

# 投票
for i in range(len(x_tail_3)):
    votes = [predictions_12[i], predictions_13[i], predictions_23[i]]
    
    if len(set(votes)) > 2:
        final_predictions.append(4)
    else:
        final_predictions.append(max(set(votes), key=votes.count))
#分類率
classification_rate_final = np.mean(final_predictions == y_head_3) * 100

print("Final Classification Rate (Including Disagreements as Errors):", classification_rate_final, "%")


Final Classification Rate (Including Disagreements as Errors): 96.0 %


In [58]:
final_predictions

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3]