<a href="https://colab.research.google.com/github/VivekanandaMudelli/hand_drawn_sketch_recognition/blob/main/MultivariateGaussian.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


we are using  scipy.stats  to import multivariate_normal to calculate the likelihood probabilities (which is gaussian distribution have mean : mean_l,covariances : cov_l, for each  label)

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.stats import multivariate_normal # Importing multivariate_normal0

In [4]:
class MultivariateGaussianClassifier:
    def __init__(self):
        self.priors_prob = {}  # creating a  empty dictionary for storing the probabilities of labels.
        self.means = {}        # creating a  empty dictionary for storing the mean of the same type labels.
        self.covariances = {}  #  creating a  empty dictionary for storing the covariances for same type of labels.
        #self.log_dets = {}  # Store log determinants of covariance matrices

    def fit(self, X, y):
        n_samp,n_fea = X.shape  # variables for storing  length samples and features
        self.labels = np.unique(y)  # storing different labels
        n_labels = len(self.labels) # total no.of different type of labels

        for label in self.labels:
            X_l = X[y == label]
            mean_l = np.mean(X_l, axis=0)
            #(np.eye(X_l.shape[1]) * 1e-6) it adds a small constant to the diagonal of the covariance matrix to
            #prevent it from being singular (non-invertible).
            cov_l = np.cov(X_l.T) + np.eye(X_l.shape[1]) * 1e-6  # Regularization :

            # Precompute log determinant
            #log_det_l = np.log(np.linalg.det(cov_l))

            self.means[label] = mean_l #storing the mean of the each labels.
            self.covariances[label] = cov_l #storing the covariances for each labels
            #self.log_dets[label] = log_det_l
            self.priors_prob[label] = X_l.shape[0] / n_samp # Fixed: probabilities (prior) for each label

    def predict(self, X):

        #calculating log of likelihood probabilities by using multivariate_normal (taking total mean and covariances matrix and )
        #using pdf(gaussian distribution ) returning all probabilities for each labels
        log_likelihood = [multivariate_normal.logpdf(X, mean=self.means[c], cov=self.covariances[c]) for c in self.labels]
        log_post_probs = [self.priors_prob[l] + log_likelihood[l]  for l in self.labels]  # Discriminant function

        return self.labels[np.argmax(log_post_probs, axis=0)]  # Vectorized label selection # Fixed: Indentation and axis is changed to 0

In [7]:
df = pd.read_csv('/content/drive/MyDrive/prml/pca_hog_data.csv')
X = df.drop('label', axis=1).values
y = df['label'].values

In [8]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Train classifier
classifier = MultivariateGaussianClassifier()
classifier.fit(X_train, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Accuracy: 38.31%


In [9]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train classifier
classifier = MultivariateGaussianClassifier()
classifier.fit(X_train, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Accuracy: 37.19%
