<a href="https://colab.research.google.com/github/VivekanandaMudelli/hand_drawn_sketch_recognition/blob/main/multivariategaussian_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import multivariate_normal
import joblib
import os

class BDT:
    def __init__(self):
        self.priors_prob = {}  # storing the probabilities of labels
        self.means = {}        # storing the mean of the same type labels
        self.covariances = {}  # storing the covariances for same type of labels
        self.labels = None

    def fit(self, X, y):
        n_samp, n_fea = X.shape  # number of samples and features
        self.labels = np.unique(y)  # storing different labels

        for label in self.labels:
            X_l = X[y == label]
            mean_l = np.mean(X_l, axis=0)
            # Adding small constant to diagonal to prevent singular matrix
            cov_l = np.cov(X_l.T) + np.eye(n_fea) * 1e-6

            self.means[label] = mean_l
            self.covariances[label] = cov_l
            self.priors_prob[label] = X_l.shape[0] / n_samp

        return self

    def predict(self, X):
        if self.labels is None:
            raise ValueError("Model not fitted yet!")

        log_probs = np.zeros((len(X), len(self.labels)))

        for i, label in enumerate(self.labels):
            # Calculate log probabilities for each sample for this label
            log_likelihood = multivariate_normal.logpdf(
                X,
                mean=self.means[label],
                cov=self.covariances[label]
            )
            log_probs[:, i] = np.log(self.priors_prob[label]) + log_likelihood

        # Return prediction (label with highest probability for each sample)
        return self.labels[np.argmax(log_probs, axis=1)]

    def save_model(self, filename):
        """Save the model to a file."""
        model_data = {
            'priors_prob': self.priors_prob,
            'means': self.means,
            'covariances': self.covariances,
            'labels': self.labels
        }
        joblib.dump(model_data, filename)
        print(f"Model saved to {filename}")

    @classmethod
    def load_model(cls, filename):
        """Load the model from a file."""
        if not os.path.exists(filename):
            raise FileNotFoundError(f"Model file {filename} not found")

        model_data = joblib.load(filename)

        model = cls()
        model.priors_prob = model_data['priors_prob']
        model.means = model_data['means']
        model.covariances = model_data['covariances']
        model.labels = model_data['labels']

        return model


In [None]:
if __name__ == "__main__":
    # Load the data
    data = pd.read_csv('/content/drive/MyDrive/prml/cnn_features_train.csv')
    X = data.drop(data.columns[0], axis=1).drop(["extracted_part", "encoded_part"], axis=1)
    y = data["encoded_part"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    # Create and train the model (our BDT class no longer takes C or max_iter
    # parameters)
    model = BDT()
    model.fit(X_train.values, y_train.values)

    # Evaluate the model on test data
    y_pred = model.predict(X_test.values)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    # Save the model
    model.save_model('BDT_model.pkl')

    # Example of loading the model and making predictions
    loaded_model = BDT.load_model('BDT_model.pkl')
    sample_prediction = loaded_model.predict(X_test.values[:1])
    print(f"Sample prediction: {sample_prediction[0]}")


Test Accuracy: 53.87%
Model saved to BDT_model.pkl
