<a href="https://colab.research.google.com/github/Yash-11want/Find-your_destination/blob/main/multivariategaussian_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.stats import multivariate_normal # Importing multivariate_normal0
from sklearn.decomposition import PCA
import joblib

In [12]:
class MultivariateGaussianClassifier:
    def __init__(self):
        self.priors_prob = {}  # creating a  empty dictionary for storing the probabilities of labels.
        self.means = {}        # creating a  empty dictionary for storing the mean of the same type labels.
        self.covariances = {}  #  creating a  empty dictionary for storing the covariances for same type of labels.
        #self.log_dets = {}  # Store log determinants of covariance matrices

    def fit(self, X, y):
        n_samp,n_fea = X.shape  # variables for storing  length samples and features
        self.labels = np.unique(y)  # storing different labels
        n_labels = len(self.labels) # total no.of different type of labels

        for label in self.labels:
            X_l = X[y == label]
            mean_l = np.mean(X_l, axis=0)
            #(np.eye(X_l.shape[1]) * 1e-6) it adds a small constant to the diagonal of the covariance matrix to
            #prevent it from being singular (non-invertible).
            cov_l = np.cov(X_l.T) + np.eye(X_l.shape[1]) * 1e-6  # Regularization :

            # Precompute log determinant
            #log_det_l = np.log(np.linalg.det(cov_l))

            self.means[label] = mean_l #storing the mean of the each labels.
            self.covariances[label] = cov_l #storing the covariances for each labels
            #self.log_dets[label] = log_det_l
            self.priors_prob[label] = X_l.shape[0] / n_samp # Fixed: probabilities (prior) for each label

    def predict(self, X):

        #calculating log of likelihood probabilities by using multivariate_normal (taking total mean and covariances matrix and )
        #using pdf(gaussian distribution ) returning all probabilities for each labels
        log_likelihood = [multivariate_normal.logpdf(X, mean=self.means[c], cov=self.covariances[c]) for c in self.labels]
        log_post_probs = [self.priors_prob[l] + log_likelihood[l]  for l in self.labels]  # Discriminant function

        return self.labels[np.argmax(log_post_probs, axis=0)]  # Vectorized label selection # Fixed: Indentation and axis is changed to 0

In [None]:
df = pd.read_csv('/content/drive/MyDrive/prml/cnn_features_train.csv')
df

Unnamed: 0.1,Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_2040,feature_2041,feature_2042,feature_2043,feature_2044,feature_2045,feature_2046,feature_2047,extracted_part,encoded_part
0,0,0.096372,0.290125,0.023769,0.020769,0.128336,0.088538,0.351039,0.036722,0.311688,...,0.015574,0.120894,0.689553,0.089658,0.014231,0.157314,0.053497,0.050435,violin,241
1,1,0.697527,0.069318,0.256686,0.117609,0.198656,0.568475,0.901217,0.272297,0.680210,...,0.023181,0.407349,1.032929,0.018647,0.344770,0.277606,0.556503,0.006328,violin,241
2,2,0.023048,0.386680,0.000749,0.000000,0.425920,0.129222,0.672938,0.055085,0.085282,...,0.018148,0.008317,0.678952,0.015125,0.041926,0.114108,0.002703,0.059334,violin,241
3,3,0.314665,0.133836,0.076663,0.017955,0.466687,0.076860,0.976595,0.076357,0.105185,...,0.003540,0.320753,0.768330,0.044698,0.123987,0.379935,0.234035,0.196416,violin,241
4,4,0.045428,0.375057,0.027606,0.000000,0.251049,0.371285,0.109352,0.076543,0.369173,...,0.000000,0.037904,0.408619,0.017229,0.247702,0.295875,0.010435,0.002851,violin,241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15995,15995,0.173298,0.288684,0.038316,0.046567,0.349442,0.125054,1.187389,0.365123,0.330671,...,0.078170,0.045820,0.278177,0.116832,0.096903,0.166981,0.208617,0.388443,apple,4
15996,15996,0.200062,0.145902,0.015686,0.084857,0.419497,0.125800,0.780225,0.246931,0.607631,...,0.140179,0.036867,0.610447,0.119481,0.191841,0.403482,0.099671,0.178655,apple,4
15997,15997,0.078571,0.066972,0.024510,0.017365,0.781149,0.114321,0.892347,0.525485,0.490710,...,0.158688,0.057387,0.573591,0.131755,0.327526,0.420424,0.082641,0.224793,apple,4
15998,15998,0.014721,0.152039,0.107902,0.000000,0.311134,0.093912,0.952523,0.152570,0.485024,...,0.000000,0.345436,0.864554,0.029831,0.261657,0.497965,0.282121,0.129772,apple,4


In [None]:
X = df.drop(df.columns[0], axis=1).drop(['extracted_part','encoded_part'], axis=1).values
y = df['encoded_part'].values

In [None]:
# Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


# Train classifier
classifier = MultivariateGaussianClassifier()
classifier.fit(X_train, y_train)

# Assuming your trained classifier is named 'classifier'
joblib.dump(classifier, 'multivariate_gaussian_classifier.joblib')

# Predict and evaluate
y_pred = classifier.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Accuracy: 53.87%
