# Proving hypothesis testing one sample

In [115]:
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt

In [189]:
x = np.linspace(stats.norm.ppf(0.01), stats.norm.ppf(0.99), 1600)
population = pd.Series(stats.norm.pdf(x))

In [244]:
population.describe()

count    1600.000000
mean        0.210516
std         0.127380
min         0.026652
25%         0.087016
50%         0.202651
75%         0.336655
max         0.398942
dtype: float64

In [245]:
print("population skew:", population.skew())
print("population mean:", population.mean())

population skew: 0.08038656863163338
population mean: 0.21051557382826455


In [217]:
sample = population[:-400]

In [243]:
print("Hypothesis Testing\n"
      "H0: μ <= 0.21\n"
      "H1: μ > 0.21")

t_stat, p_value = stats.ttest_1samp(a = sample, popmean=0.24, alternative="greater")

print("sample mean:", sample.mean())
print("t-statistic: {0:.2f} p-value: {1:.2f}".format(t_stat, p_value))

print("the popmean argument is our estimation of the population mean. "
      "we can change the argumen as we like until we get enough evidence to reject "
      "the null hypothesis.")

print("if we want the confidence interval to be 95%, "
      "therefore the critical value is 2.5%")

print("because p-value is below 0.025, we have enough evidence to reject the null hypothesis. "
      "therefore, we can conclude that the mean population is more than 0.24.")

print("estimated population standard deviation {0:.2f}".format(sample.std(ddof=0)))

print("if we imagine the sample as the weight of waste per day, the conclusion is, "
      "95% of the time, the average weight of waste is less than 0.24. "
      "therefore, if we make a drop off site, the capacity per day should be "
      "estimated mean population + 3 * estimated mean standard deviation, "
      "which is: 0.24 + 3 * 0.12. "
      "with that capacity, the drop off site can handle 99.73% of the time without issue.")

Hypothesis Testing
H0: μ <= 0.21
H1: μ > 0.21
sample mean: 0.24853389422796615
t-statistic: 2.42 p-value: 0.01
popmean argument is our estimation of the population mean. we can change the argumen as we like until we get enough evidence to reject the null hypothesis
if we want the confidence interval to be 95%, therefore the critical value is 2.5%
because p-value is below 0.025, we have enough evidence to reject the null hypothesis. therefore, we can conclude that the mean population is more than 0.24
estimated population standard deviation 0.12
if we imagine the sample as the weight of waste per day, the conclusion is, 95% of the time, the average weight of waste is less than 0.24. therefore, if we make a drop off site, the capacity per day should be estimated mean population + 3 * estimated mean standard deviation, which is: 0.24 + 3 * 0.12. with that capacity, the drop off site can handle 99.73% of waste delivery without issue.
