In [2]:
import os
import csv
import cv2 as cv
import numpy as np
from sklearn.preprocessing import StandardScaler

img_size = 300
classes = ['Alzheimer', 'COVID', 'Brazilian_seeds', 'Brazilian_leaves', 'skin_cancer']
data_dir = './image_dataset/'
case1_csv_filename = 'case1_statistics.csv'
case2_csv_filename = 'case2_statistics.csv'
reg_param = 1e-5

def load_images_from_folder(folder_path):
    images = []
    for img_file in os.listdir(folder_path):
        img = cv.imread(os.path.join(folder_path, img_file), cv.IMREAD_GRAYSCALE)
        img_resized = cv.resize(img, (img_size, int(img.shape[0] * img_size / img.shape[1])), cv.INTER_AREA)
        images.append(img_resized)
    return images

def calculate_statistics(image, bins=8):
    hist, bin_edges = np.histogram(image.flatten(), bins=np.arange(0, 257, 256 // bins))
    hist = hist / np.sum(hist)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    var_exp = np.sum(hist * bin_centers)
    var_median = bin_centers[np.searchsorted(np.cumsum(hist), 0.5)]
    var_mode = bin_centers[np.argmax(hist)]
    var_variance = np.sum(((bin_centers - var_exp) ** 2) * hist)
    var_skewness = np.sum(((bin_centers - var_exp) ** 3) * hist)
    var_kurtosis = np.sum(((bin_centers - var_exp) ** 4) * hist)
    var_entropy = -np.sum(hist * np.log2(hist + 1e-27))
    return hist, var_exp, var_median, var_mode, var_variance, var_skewness, var_kurtosis, var_entropy

def save_statistics_to_csv(images, class_label):
    with open(case1_csv_filename, 'a', newline='') as f1, open(case2_csv_filename, 'a', newline='') as f2:
        writer1, writer2 = csv.writer(f1), csv.writer(f2)
        for image in images:
            stats = calculate_statistics(image)
            writer1.writerow(list(stats[0]) + [class_label])
            writer2.writerow(stats[1:] + (class_label,))

def load_data_from_csv(filename):
    data, labels = [], []
    with open(filename, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header
        for row in reader:
            if row:
                data.append([float(x) for x in row[:-1]])
                labels.append(int(row[-1]))
    return np.array(data), np.array(labels)

def train_test_split(data, labels, test_size=0.1):
    indices = np.random.permutation(len(data))
    split = int(len(data) * (1 - test_size))
    train_idx, test_idx = indices[:split], indices[split:]
    return data[train_idx], data[test_idx], labels[train_idx], labels[test_idx]

def confusion_matrix(y_true, y_pred, num_classes):
    cm = np.zeros((num_classes, num_classes), dtype=int)
    for i in range(len(y_true)):
        cm[y_true[i]][y_pred[i]] += 1
    return cm

def accuracy_per_class(cm):
    return np.diag(cm) / np.sum(cm, axis=1)

def overall_accuracy(cm):
    return np.sum(np.diag(cm)) / np.sum(cm)

def evaluate_classifier(classifier, X_train, y_train, X_test, y_test, classifier_name):
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred, len(classes))
    print(f"{classifier_name} Confusion Matrix:\n", cm)
    print(f"{classifier_name} Accuracy per Class:\n", accuracy_per_class(cm))
    print(f"{classifier_name} Accuracy:", overall_accuracy(cm))
    print("\n----------------------------------------\n")

class NaiveBayesClassifier:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {cls: X[y == cls].mean(axis=0) for cls in self.classes}
        self.var = {cls: X[y == cls].var(axis=0) + reg_param for cls in self.classes}
        self.priors = {cls: len(X[y == cls]) / len(X) for cls in self.classes}

    def predict(self, X):
        return np.array([self._predict(x) for x in X])

    def _predict(self, x):
        posteriors = [np.log(self.priors[cls]) + np.sum(np.log(self._pdf(cls, x) + 1e-10)) for cls in self.classes]
        return self.classes[np.argmax(posteriors)]

    def _pdf(self, cls, x):
        mean, var = self.mean[cls], self.var[cls]
        return np.exp(- (x - mean) ** 2 / (2 * var)) / np.sqrt(2 * np.pi * var)

class QuadraticDiscriminantAnalysis:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {cls: X[y == cls].mean(axis=0) for cls in self.classes}
        self.cov = {cls: np.cov(X[y == cls].T) + np.eye(X.shape[1]) * reg_param for cls in self.classes}
        self.priors = {cls: len(X[y == cls]) / len(X) for cls in self.classes}

    def predict(self, X):
        return np.array([self._predict(x) for x in X])

    def _predict(self, x):
        discriminants = [self._quadratic_discriminant(cls, x) for cls in self.classes]
        return self.classes[np.argmax(discriminants)]

    def _quadratic_discriminant(self, cls, x):
        mean, cov = self.mean[cls], self.cov[cls]
        inv_cov = np.linalg.inv(cov)
        W = -0.5 * inv_cov
        w = inv_cov @ mean
        w0 = -0.5 * (mean @ inv_cov @ mean.T) - 0.5 * np.log(np.linalg.det(cov)) + np.log(self.priors[cls])
        return x @ W @ x.T + w @ x + w0

class LinearDiscriminantAnalysis:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {cls: X[y == cls].mean(axis=0) for cls in self.classes}
        self.cov = np.cov(X.T) + np.eye(X.shape[1]) * reg_param
        self.inv_cov = np.linalg.inv(self.cov)
        self.priors = {cls: len(X[y == cls]) / len(X) for cls in self.classes}

    def predict(self, X):
        return np.array([self._predict(x) for x in X])

    def _predict(self, x):
        discriminants = [self._linear_discriminant(cls, x) for cls in self.classes]
        return self.classes[np.argmax(discriminants)]

    def _linear_discriminant(self, cls, x):
        mean = self.mean[cls]
        w = self.inv_cov @ mean
        w0 = -0.5 * (mean @ self.inv_cov @ mean.T) + np.log(self.priors[cls])
        return w @ x + w0

# Initialize CSV files
with open(case1_csv_filename, 'w', newline='') as f1, open(case2_csv_filename, 'w', newline='') as f2:
    writer1, writer2 = csv.writer(f1), csv.writer(f2)
    writer1.writerow([f'h[{i}]' for i in range(8)] + ['class'])
    writer2.writerow(['expectancy', 'mode', 'median', 'variance', 'skewness', 'kurtosis', 'entropy', 'class'])

# Process images and save statistics
for class_id, class_name in enumerate(classes):
    folder_path = os.path.join(data_dir, class_name)
    images = load_images_from_folder(folder_path)
    save_statistics_to_csv(images, class_id)

# Evaluate CASE 1
data_case1, labels_case1 = load_data_from_csv(case1_csv_filename)
X_train_case1, X_test_case1, y_train_case1, y_test_case1 = train_test_split(data_case1, labels_case1)
scaler = StandardScaler()
X_train_case1 = scaler.fit_transform(X_train_case1)
X_test_case1 = scaler.transform(X_test_case1)
print("\n--------------------CASE1--------------------\n")
evaluate_classifier(NaiveBayesClassifier(), X_train_case1, y_train_case1, X_test_case1, y_test_case1, "Bayes")
evaluate_classifier(QuadraticDiscriminantAnalysis(), X_train_case1, y_train_case1, X_test_case1, y_test_case1, "QDA")
evaluate_classifier(LinearDiscriminantAnalysis(), X_train_case1, y_train_case1, X_test_case1, y_test_case1, "LDA")

# Evaluate CASE 2
data_case2, labels_case2 = load_data_from_csv(case2_csv_filename)
X_train_case2, X_test_case2, y_train_case2, y_test_case2 = train_test_split(data_case2, labels_case2)
X_train_case2 = scaler.fit_transform(X_train_case2)
X_test_case2 = scaler.transform(X_test_case2)
print("\n--------------------CASE2--------------------\n")
evaluate_classifier(NaiveBayesClassifier(), X_train_case2, y_train_case2, X_test_case2, y_test_case2, "Bayes")
evaluate_classifier(QuadraticDiscriminantAnalysis(), X_train_case2, y_train_case2, X_test_case2, y_test_case2, "QDA")
evaluate_classifier(LinearDiscriminantAnalysis(), X_train_case2, y_train_case2, X_test_case2, y_test_case2, "LDA")



--------------------CASE1--------------------

Bayes Confusion Matrix:
 [[10  0  0  0  0]
 [ 0  5  0  1  2]
 [ 0  0  1  0  0]
 [ 0  1  0  4  0]
 [ 0  0  0  0  2]]
Bayes Accuracy per Class:
 [1.    0.625 1.    0.8   1.   ]
Bayes Accuracy: 0.8461538461538461

----------------------------------------

QDA Confusion Matrix:
 [[10  0  0  0  0]
 [ 0  5  0  1  2]
 [ 0  0  1  0  0]
 [ 0  1  0  4  0]
 [ 0  0  0  0  2]]
QDA Accuracy per Class:
 [1.    0.625 1.    0.8   1.   ]
QDA Accuracy: 0.8461538461538461

----------------------------------------

LDA Confusion Matrix:
 [[10  0  0  0  0]
 [ 0  7  0  0  1]
 [ 0  0  1  0  0]
 [ 0  0  0  5  0]
 [ 0  0  0  0  2]]
LDA Accuracy per Class:
 [1.    0.875 1.    1.    1.   ]
LDA Accuracy: 0.9615384615384616

----------------------------------------


--------------------CASE2--------------------

Bayes Confusion Matrix:
 [[7 0 0 0 0]
 [0 3 0 2 4]
 [0 0 4 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]]
Bayes Accuracy per Class:
 [1.         0.33333333 1.         1.   

  posteriors = [np.log(self.priors[cls]) + np.sum(np.log(self._pdf(cls, x))) for cls in self.classes]
  posteriors = [np.log(self.priors[cls]) + np.sum(np.log(self._pdf(cls, x))) for cls in self.classes]
