### Hypothesis Tests 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

##### Normal Distribution

In [None]:
x = np.linspace(stats.norm.ppf(0.01), stats.norm.ppf(0.99), 1000)
distribution = stats.norm.pdf(x)

In [None]:
plt.plot(x, distribution)
plt.title('A Normal Distribution')

In [None]:
df = pd.DataFrame({'heights': distribution}, index=x)

In [None]:
#coin example
##one coin flip
np.random.binomial(1, 0.5)

In [None]:
np.random.binomial(1000, 0.5)/1000

In [None]:
distribution_of_flips = np.random.binomial(100, 0.5, 1000)

In [None]:
sns.distplot(distribution_of_flips, hist=False)

In [None]:
#gaussian
sns.distplot(np.random.normal(size = 1000), hist = False, label = 'Group 1')
sns.distplot(np.random.normal(loc = 0.75, size = 1000), hist = False, label = 'Group 2')

#### Hypothesis Tests

- What's the distribution that I'm imagining -- here it's a normal with mean 0.  
- p-value: 0.001

In [None]:
#1-sample t-test
#test if mean is likely equal to some value
stats.ttest_1samp(df['heights'], 0)

In [None]:
g1 = np.random.normal(size = 1000)
g2 = np.random.normal(loc = 0.75, size = 1000)

In [None]:
stats.ttest_ind(g1, g2)

In [None]:
def p_value_experiment(location):
    np.random.seed(24)
    g1 = np.random.normal(size = 1000)
    g2 = np.random.normal(loc = location, size = 1000)
    tstat, pval = stats.ttest_ind(g1, g2)
    sns.distplot(g1, hist = False, label = 'Group 1')
    sns.distplot(g2, hist = False, label = 'Group 2')
    plt.title(f'The p-value is {pval: .6f}')

In [None]:
from ipywidgets import interact
import ipywidgets as widgets

In [None]:
interact(p_value_experiment, location = widgets.FloatSlider(min = -2, max = 2, step = 0.05))

In [None]:
#if not independent?
stats.ttest_rel(g1, g2)

In [None]:
stats.ttest_1samp(g1 - g2, 0)

In [None]:
df1 = pd.DataFrame({'height': g1, 'label': ['g1' for g in g1]})
df2 = pd.DataFrame({'height': g2, 'label': ['g2' for g in g2]})

In [None]:
df = df1.append(df2)

In [None]:
df.head()

In [None]:
from statsmodels.formula.api import ols

In [None]:
model = ols('height ~ label', df).fit()

In [None]:
print(model.summary())

In [None]:
stats.ttest_ind(g1, g2)