# Testing if a Distribution is Normal

## Imports

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import quiz_tests

# Set plotting options
%matplotlib inline
plt.rc('figure', figsize=(16, 9))

## Create normal and non-normal distributions

In [2]:
# Sample A: Normal distribution
sample_a = stats.norm.rvs(loc=0.0, scale=1.0, size=(1000,))

# Sample B: Non-normal distribution
sample_b = stats.lognorm.rvs(s=0.5, loc=0.0, scale=1.0, size=(1000,))

## Quiz Solution

# Quiz

To use the Kolmogorov-Smirnov test, complete the function `is_normal_ks`.

To set the variable normal_args, create a tuple with two values.  An example of a tuple is `("apple","banana")`
The first is the mean of the sample. The second is the standard deviation of the sample.

**hint:** Hint: Numpy has functions np.mean() and np.std()

In [3]:
def is_normal_ks(sample, test=stats.kstest, p_level=0.05, **kwargs):    
    """
    sample: a sample distribution
    test: a function that tests for normality
    p_level: if the test returns a p-value > than p_level, assume normality
    
    return: True if distribution is normal, False otherwise
    """
    normal_args = (np.mean(sample),np.std(sample))
    
    t_stat, p_value = test(sample, 'norm', normal_args, **kwargs)
    print("Test statistic: {}, p-value: {}".format(t_stat, p_value))
    print("Is the distribution Likely Normal? {}".format(p_value > p_level))
    return p_value > p_level

quiz_tests.test_is_normal_ks(is_normal_ks)

Test statistic: 0.014325774203138214, p-value: 0.9864397892403253
Is the distribution Likely Normal? True
Test statistic: 0.11398973140410906, p-value: 8.942402374145786e-12
Is the distribution Likely Normal? False
Tests Passed


In [4]:
# Using Kolmogorov-Smirnov test
print("Sample A:-"); is_normal_ks(sample_a);
print("Sample B:-"); is_normal_ks(sample_b);

Sample A:-
Test statistic: 0.017666627639972288, p-value: 0.9138477723499415
Is the distribution Likely Normal? True
Sample B:-
Test statistic: 0.09043674061494456, p-value: 1.4433763562138324e-07
Is the distribution Likely Normal? False
