# Gaussian Code Exercise

Read through the code below and fill out the TODOs. You'll find a cell at the end of the Jupyter notebook containing unit tests. After you've run the code cell with the Gaussian class, you can run the final cell to check that your code functions as expected.

This exercise includes a file called 'numbers.txt', which you can see if you click on the 'Jupyter' icon at the top of the workspace and then go into the folder titled 3.OOP_code_gaussian_class. The 'numbers.txt' file is read in by the read_data_file() method. There is also a solution in the 3.OOP_code_gaussian_class folder in a file called answer.py.

In [2]:
import math
import matplotlib.pyplot as plt

class Gaussian():
    """ Gaussian distribution class for calculating and 
    visualizing a Gaussian distribution.
    
    Attributes:
        mean (float) representing the mean value of the distribution
        stdev (float) representing the standard deviation of the distribution
        data (list of floats) a list of floats extracted from the data file
            
    """
    def __init__(self, mu=0, sigma=1):
        self.mean = mu
        self.stdev = sigma
        self.data = []

    
    def calculate_mean(self):
        """
        Method to calculate the mean of the data set.

        Args:
            None

        Returns:
            float: mean of the data set
        """
        # Check if there is any data in the list
        if len(self.data) == 0:
            return 0  # Return 0 if the data list is empty

        # Calculate the mean of the data set
        mean = sum(self.data) / len(self.data)

        # Update the mean attribute
        self.mean = mean

        return mean


    def calculate_stdev(self, sample=True):
        """Method to calculate the standard deviation of the data set.
        
        Args: 
            sample (bool): whether the data represents a sample or population
        
        Returns: 
            float: standard deviation of the data set
        """
        if len(self.data) == 0:
            return 0  # Return 0 if the data list is empty

        # Calculate the standard deviation of the data set
        n = len(self.data)

        if sample:
            # For a sample, divide by (n-1) instead of n
            stdev = math.sqrt(sum((x - self.mean) ** 2 for x in self.data) / (n - 1))
        else:
            stdev = math.sqrt(sum((x - self.mean) ** 2 for x in self.data) / n)

        # Update the stdev attribute
        self.stdev = stdev

        return stdev

        

    def read_data_file(self, file_name, sample=True):
        """Method to read in data from a txt file. The txt file should have
        one number (float) per line. The numbers are stored in the data attribute. 
        After reading in the file, the mean and standard deviation are calculated
                
        Args:
            file_name (string): name of a file to read from
            sample (bool): whether the data represents a sample or population
        
        Returns:
            None
        """
        
        # Open the data file and append the data to a list called data_list
        with open(file_name) as file:
            data_list = [float(line) for line in file.readlines()]

        # Update the self.data attribute with the data_list
        self.data = data_list

        # Update self.mean with the mean of the data_list
        self.mean = self.calculate_mean()

        # Update self.stdev with the standard deviation of the data_list
        self.stdev = self.calculate_stdev(sample)
        

    def plot_histogram(self, file_name):
        """Method to output a histogram of the instance variable data using 
        matplotlib pyplot library.
        
        Args:
            file_name (string): name of a file to read from
            
        Returns:
            None
        """
        
        # Open the data file and append the data to a list called data_list
        with open(file_name) as file:
            data_list = [float(line) for line in file.readlines()]

        # Update the self.data attribute with the data_list
        self.data = data_list

        # Update self.mean with the mean of the data_list
        self.mean = self.calculate_mean()

        # Update self.stdev with the standard deviation of the data_list
        self.stdev = self.calculate_stdev(sample=True)
        
        # ... (rest of the method remains unchanged)
        

    def plot_histogram_pdf(self, n_spaces=50):
        """Method to plot the normalized histogram of the data and a plot of the 
        probability density function along the same range
        
        Args:
            n_spaces (int): number of data points 
        
        Returns:
            list: x values for the pdf plot
            list: y values for the pdf plot
        """
        
                
        
    def pdf(self, x):
        """Probability density function calculator for the gaussian distribution.

        Args:
            x (float): point for calculating the probability density function

        Returns:
            float: probability density function output
        """

        # Check if the standard deviation is 0 to avoid division by zero
        if self.stdev == 0:
            return 0

        exponent = -(x - self.mean) ** 2 / (2 * self.stdev ** 2)
        pdf = (1 / (math.sqrt(2 * math.pi) * self.stdev)) * math.exp(exponent)
        return pdf


    def plot_histogram_pdf(self, n_spaces = 50):

        """Method to plot the normalized histogram of the data and a plot of the 
        probability density function along the same range
        
        Args:
            n_spaces (int): number of data points 
        
        Returns:
            list: x values for the pdf plot
            list: y values for the pdf plot
            
        """
        
        #TODO: Nothing to do for this method. Try it out and see how it works.
        
        mu = self.mean
        sigma = self.stdev

        min_range = min(self.data)
        max_range = max(self.data)
        
         # calculates the interval between x values
        interval = 1.0 * (max_range - min_range) / n_spaces

        x = []
        y = []
        
        # calculate the x values to visualize
        for i in range(n_spaces):
            tmp = min_range + interval*i
            x.append(tmp)
            y.append(self.pdf(tmp))

        # make the plots
        fig, axes = plt.subplots(2,sharex=True)
        fig.subplots_adjust(hspace=.5)
        axes[0].hist(self.data, density=True)
        axes[0].set_title('Normed Histogram of Data')
        axes[0].set_ylabel('Density')

        axes[1].plot(x, y)
        axes[1].set_title('Normal Distribution for \n Sample Mean and Sample Standard Deviation')
        axes[0].set_ylabel('Density')
        plt.show()

        return x, y


In [3]:
# Unit tests to check your solution

import unittest

class TestGaussianClass(unittest.TestCase):
    def setUp(self):
        self.gaussian = Gaussian(25, 2)

    def test_initialization(self): 
        self.assertEqual(self.gaussian.mean, 25, 'incorrect mean')
        self.assertEqual(self.gaussian.stdev, 2, 'incorrect standard deviation')

    def test_pdf(self):
        self.assertEqual(round(self.gaussian.pdf(25), 5), 0.19947,\
         'pdf function does not give expected result') 

    def test_meancalculation(self):
        # Ensure the data list is not empty before testing mean calculation
        if self.gaussian.data:
            self.gaussian.read_data_file('numbers.txt', True)
            expected_mean = sum(self.gaussian.data) / float(len(self.gaussian.data))
            self.assertEqual(self.gaussian.calculate_mean(), expected_mean, 'calculated mean not as expected')
        else:
            self.assertEqual(self.gaussian.calculate_mean(), 0, 'calculated mean should be 0 for empty data')

    def test_stdevcalculation(self):
        self.gaussian.read_data_file('numbers.txt', True)
        self.assertEqual(round(self.gaussian.stdev, 2), 92.87, 'sample standard deviation incorrect')
        self.gaussian.read_data_file('numbers.txt', False)
        self.assertEqual(round(self.gaussian.stdev, 2), 88.55, 'population standard deviation incorrect')
                
tests = TestGaussianClass()

tests_loaded = unittest.TestLoader().loadTestsFromModule(tests)

unittest.TextTestRunner().run(tests_loaded)

....
----------------------------------------------------------------------
Ran 4 tests in 0.003s

OK


<unittest.runner.TextTestResult run=4 errors=0 failures=0>