In [4]:
import abc
import csv

class ContinuousDistribution(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def import_data(self, file_path):
        pass

    @abc.abstractmethod
    def export_data(self, data, file_path):
        pass

    @abc.abstractmethod
    def compute_mean(self, data):
        pass

    @abc.abstractmethod
    def compute_standard_deviation(self, data):
        pass

    @abc.abstractmethod
    def visualize(self, data=None):
        pass

    @abc.abstractmethod
    def generate_samples(self, n_samples):
        pass

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import multivariate_normal
import csv

class GaussDistribution(ContinuousDistribution):

    def __init__(self, dim=2):
        self.dim = dim
        self.mean = None
        self.cov = None
        self.samples = None

    def import_data(self, file_path):
        with open(file_path, 'r') as file:
            reader = csv.reader(file)
            self.samples = np.array([list(map(float, row)) for row in reader])

    def export_data(self, data, file_path):
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(data)

    def compute_mean(self, data):
        self.mean = np.mean(data, axis=0)

    def compute_standard_deviation(self, data):
        self.cov = np.cov(data, rowvar=False)

    def visualize(self, data=None):
        if self.dim == 1:
            if data is not None:
                plt.hist(data, bins=30, density=True)
            else:
                x = np.linspace(self.mean - 3*np.sqrt(self.cov), self.mean + 3*np.sqrt(self.cov), 100)
                y = multivariate_normal.pdf(x, mean=self.mean, cov=self.cov)
                plt.plot(x, y)
        elif self.dim == 2:
            if data is not None:
                plt.scatter(data[:,0], data[:,1], s=5)
            else:
                x, y = np.mgrid[self.mean[0]-3*np.sqrt(self.cov[0][0]):self.mean[0]+3*np.sqrt(self.cov[0][0]):.1,
                                self.mean[1]-3*np.sqrt(self.cov[1][1]):self.mean[1]+3*np.sqrt(self.cov[1][1]):.1]
                pos = np.empty(x.shape + (2,))
                pos[:, :, 0] = x; pos[:, :, 1] = y
                z = multivariate_normal.pdf(pos, mean=self.mean, cov=self.cov)
                fig = plt.figure()
                ax = fig.gca(projection='3d')
                ax.plot_surface(x, y, z, cmap='viridis', linewidth=0)
        elif self.dim == 3:
            if data is not None:
                fig = plt.figure()
                ax = fig.add_subplot(111, projection='3d')
                ax.scatter(data[:,0], data[:,1], data[:,2], s=5)
            else:
                x, y, z = np.mgrid[self.mean[0]-3*np.sqrt(self.cov[0][0]):self.mean[0]+3*np.sqrt(self.cov[0][0]):.1,
                                    self.mean[1]-3*np.sqrt(self.cov[1][1]):self.mean[1]+3*np.sqrt(self.cov[1][1]):.1,
                                    self.mean[2]-3*np.sqrt(self.cov[2][2]):self.mean[2]+3*np.sqrt(self.cov[2][2]):.1]
                pos = np.empty(x.shape + (3,))
                pos[:, :, :, 0] = x; pos[:, :, :, 1] = y; pos[:, :, :, 2] = z
                w = multivariate_normal.pdf(pos, mean=self.mean, cov=self.cov)
                fig = plt.figure

        def generate_samples(self, num_samples):
            self.samples = np.random.multivariate_normal(mean=self.mean, cov=self.cov, size=num_samples)

    def fit(self, data):
        self.compute_mean(data)
        self.compute_standard_deviation(data)

    def fit_from_file(self, file_path):
        self.import_data(file_path)
        self.fit(self.samples)

    def plot_samples_and_distribution(self, file_path):
        self.fit_from_file(file_path)
        self.generate_samples(len(self.samples))
        fig, ax = plt.subplots(1, 2, figsize=(12, 6))
        ax[0].scatter(self.samples[:,0], self.samples[:,1], s=5)
        if self.dim == 2:
            x, y = np.mgrid[self.mean[0]-3*np.sqrt(self.cov[0][0]):self.mean[0]+3*np.sqrt(self.cov[0][0]):.1,
                            self.mean[1]-3*np.sqrt(self.cov[1][1]):self.mean[1]+3*np.sqrt(self.cov[1][1]):.1]
            pos = np.empty(x.shape + (2,))
            pos[:, :, 0] = x; pos[:, :, 1] = y
            z = multivariate_normal.pdf(pos, mean=self.mean, cov=self.cov)
            ax[1].contourf(x, y, z, cmap='viridis')
            ax[1].set_xlabel('x')
            ax[1].set_ylabel('y')
            ax[1].set_title('Multivariate Gaussian Distribution')
        elif self.dim == 3:
            x, y, z = np.mgrid[self.mean[0]-3*np.sqrt(self.cov[0][0]):self.mean[0]+3*np.sqrt(self.cov[0][0]):.1,
                                self.mean[1]-3*np.sqrt(self.cov[1][1]):self.mean[1]+3*np.sqrt(self.cov[1][1]):.1,
                                self.mean[2]-3*np.sqrt(self.cov[2][2]):self.mean[2]+3*np.sqrt(self.cov[2][2]):.1]
            pos = np.empty(x.shape + (3,))
            pos[:, :, :, 0] = x; pos[:, :, :, 1] = y; pos[:, :, :, 2] = z
            w = multivariate_normal.pdf(pos, mean=self.mean, cov=self.cov)
            ax[1] = fig.add_subplot(122, projection='3d')
            ax[1].scatter(self.samples[:,0], self.samples[:,1], self.samples[:,2], s=5)
            ax[1].set_xlabel('x')
            ax[1].set_ylabel('y')
            ax[1].set_zlabel('z')
            ax[1].set_title('Sampled Data')
            ax[1].plot_surface(x, y, z, cmap='viridis', linewidth=0, alpha=0.3)
            ax[1].set_title('Multivariate Gaussian Distribution and Sampled Data')

# Load the data from the csv file
data = np.loadtxt('MGD.csv', delimiter=',', skiprows=1)

# Create a GaussDistribution object and fit it to the data
gd = GaussDistribution(dim=data.shape[1])
gd.fit(data)

#Generate samples from the learned distribution
gd.generate_samples(num_samples=len(data))

#Plot the samples of the MGD.csv file and the learned distribution
gd.plot_samples_and_distribution(file_path='MGD.csv')
plt.show()

#This will display a figure with two subfigures: one showing the scatter plot of the samples from the MGD.csv file, and the other showing the learned multivariate Gaussian distribution and the sampled data. The number of dimensions of the learned distribution will depend on the number of columns in the MGD.csv file.



TypeError: Can't instantiate abstract class GaussDistribution with abstract method generate_samples