# Q1

In [33]:
from matplotlib.image import imread
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas

In [34]:
def load_images(path):
    img = imread(path)
    images = []
    for i in range(34):
        for j in range(33):
            m = img[i * 16:(i + 1) * 16, j * 16:(j + 1) * 16]
            if not (j == 32 and i >= 12):
                images.append(np.squeeze(m.T.reshape(-1)))
    return images

In [35]:
def get_data():
    trains = pandas.DataFrame()
    tests = pandas.DataFrame()
    for i in range(0, 5):
        image_data = load_images(f"usps_{i}.jpg")
        df = pandas.DataFrame(image_data)
        train_df = pandas.DataFrame(image_data[0:int(0.5 * len(image_data))])
        train_df["label"] = i
        test_df = pandas.DataFrame(image_data[int(0.5 * len(image_data)):])
        test_df["label"] = i
        df.to_csv(path_or_buf=f"{i}.csv")
        
        trains = pandas.concat([trains.copy(), train_df])
        tests = pandas.concat([tests.copy(), test_df])
    
    return trains, tests

In [36]:
train, test = get_data()

X_train, y_train = train.iloc[:, :-1], train['label']
X_test, y_test = test.iloc[:, :-1], test['label']

# Q2

In [37]:
# Import necessary libraries
import numpy as np

# Define NaiveBayes class
class NaiveBayes:
    def __init__(self):
        self.num_classes = None
        self.prior = None
        self.mean = None
        self.var = None
    
    def fit(self, X_train, y_train):
        # Calculate the number of classes and the number of features
        self.num_classes = len(np.unique(y_train))
        num_features = X_train.shape[1]
        
        # Initialize the prior, mean, and variance arrays
        self.prior = np.zeros(self.num_classes)
        self.mean = np.zeros((self.num_classes, num_features))
        self.var = np.zeros((self.num_classes, num_features))
        
        # Calculate the prior probabilities for each class
        for i in range(self.num_classes):
            self.prior[i] = np.sum(y_train == i) / len(y_train)
        
        # Calculate the mean and variance for each feature in each class
        for i in range(self.num_classes):
            X_class = X_train[y_train == i]
            self.mean[i] = np.mean(X_class, axis=0)
            self.var[i] = np.var(X_class, axis=0)
    
    def predict(self, X_test):
        # Calculate the log-likelihood for each class
        log_likelihood = np.zeros(self.num_classes)
        for i in range(self.num_classes):
            
            log_likelihood[i] = -0.5 * np.sum(np.log(2 * np.pi * self.var[i])
                                               + ((X_test - self.mean[i]) ** 2) / self.var[i])
        
        # Calculate the log-posterior probability for each class
        log_posterior = np.zeros(self.num_classes)
        for i in range(self.num_classes):
            log_posterior[i] = np.log(self.prior[i]) + log_likelihood[i]
        
        # Return the predicted class
        return np.argmax(log_posterior)
    
    def score(self, X_test, y_test):

        y_pred = []
        
        # loop through each 1D array in the 2D array
        for i in range(len(X_test)):
            x = X_test.iloc[i]
            # call the predict function on the reshaped array
            y_pre = nb.predict(x)
            y_pred.append(y_pre)
            # print the predicted value

        t = np.array(y_test)
        p = np.array(y_pred)

        acc = np.mean(p==t) * 100
        
        return acc



In [38]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):
        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # Create an instance of NaiveBayes
        nb = NaiveBayes()

        # Fit the model using your training data and labels
        nb.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = nb.score(X_val, y_val)
        accs.append(acc)
        
        
        
print("Accuracy: {:.2f}%".format(np.mean(accs) ))


Accuracy: 91.89%


# Q3

In [39]:
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split

# Train the QDA classifier using maximum likelihood parameter estimation
qda = QuadraticDiscriminantAnalysis(store_covariance=True)



skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):
        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # Create an instance of QDA
        # Fit the model using your training data and labels
        qda.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = qda.score(X_val, y_val)
        accs.append(acc)
        
print("Accuracy: {:.2f}%".format(np.mean(accs) ))


Accuracy: 0.91%


# Q4

In [40]:
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):
        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # Fit the model using your training data and labels
        lda.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = lda.score(X_val, y_val)
        accs.append(acc)
        
print("Accuracy: {:.2f}%".format(np.mean(accs) ))


Accuracy: 0.94%


# Q5

In [41]:
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):
        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # Fit the model using your training data and labels
        dt.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = dt.score(X_val, y_val)
        accs.append(acc)

print("Accuracy: {:.2f}%".format(np.mean(accs)))


Accuracy: 0.89%


# Q6

In [42]:
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC

svm = SVC()
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):
        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # Fit the model using your training data and labels
        svm.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = svm.score(X_val, y_val)
        accs.append(acc)

print("Accuracy: {:.2f}%".format(np.mean(accs)))


Accuracy: 0.99%


# Q7

# a

In [43]:
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# استفاده از Stratified K-Fold برای ارزیابی دقت
lda = LinearDiscriminantAnalysis()
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):

        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # ایجاد یک نمونه از LDA
        lda = LinearDiscriminantAnalysis(n_components=2)

        # ساخت LDA object و اعمال آن بر روی داده ها
        X_lda_train = lda.fit_transform(X_train, y_train)
        X_lda_val = lda.transform(X_val)

        # Fit the model using your training data and labels
        lda.fit(X_train, y_train)

        # Compute the accuracy on the validation set
        acc = lda.score(X_val, y_val)
        accs.append(acc)

print("Accuracy: {:.2f}%".format(np.mean(accs) ))


Accuracy: 0.94%


# b

In [44]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA



# استفاده از Stratified K-Fold برای ارزیابی دقت
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accs = []
for i in range(10):
    for train_index, test_index in skf.split(X_test, y_test):

        X_train, X_val = X_test.iloc[train_index], X_test.iloc[test_index]
        y_train, y_val = y_test.iloc[train_index], y_test.iloc[test_index]

        # ایجاد یک نمونه از PCA با تعداد ابعاد دلخواه
        pca = PCA(n_components=2)

        # کاهش بعد داده های آموزشی و تست
        X_pca_train = pca.fit_transform(X_train)
        X_pca_val = pca.transform(X_val)

        # فیت مدل LDA
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_pca_train, y_train)

        # Compute the accuracy on the validation set
        acc = lda.score(X_pca_val, y_val)
        accs.append(acc)

print("Accuracy: {:.2f}%".format(np.mean(accs) ))


Accuracy: 0.64%


# Q8