#### *SOURCE: CLARUSWAY   /   INSTRUCTOR: JasonTimberlake (Statistics)*

In [1]:
# Basic Library
import numpy as np
import pandas as pd

# Libraries for Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import scipy.stats as stats

import warnings;
warnings.filterwarnings('ignore')

***

<div class="alert alert-block alert-danger">
    
<b>One Sample T Test *(One-Tailed)*</b>
    
</div>

***

![smallsamples.png](attachment:smallsamples.png)

In [None]:
# ASSUMPTIONS
# The three basic assumptions of a test about a mean are as follows:

- The variable is quantitive
- The data production employed randomization
- The population distribution is approximately normal

In [None]:
# HYPOTHESIS

- H0: mu = 3000
- H1: mu < 3000

In [2]:
# TEST STATISTIC

mu = 3000
s = 39.1
n = 8
x = 2959

![ttest_equations.png](attachment:ttest_equations.png)

In [3]:
t = (x - mu) / (s / n**0.5)
t

-2.9658698750535497

![example1.jpg](attachment:example1.jpg)

In [4]:
t < -2.365

True

In [None]:
# Since t = -2.966 is less than -2.365, so we fail to reject the null hypothesis.

In [5]:
# P-VALUE

pValue = stats.t.cdf(t, 7)
pValue

0.010464999141960496

In [None]:
# P(t < -2.966 | H0 true) = .01046

In [6]:
# CONCLUSION (DECISION)

alpha = 0.025 # significance level

if pValue < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Reject the Null


In [None]:
# We conclude that sufficient evidence exists to contradict the manufacturer's claim and that the true mean muzzle
# velocity is less than 3000 feet per second at the .025 level of significance

***

<div class="alert alert-block alert-danger">
    
<b>One Sample T Test *(One-Tailed)*</b>
    
</div>

- According to Reynolds Intellectual Ability Scales, the average VIQ (Verbal IQ scores based on the four Wechsler (1981) subtests) is about 109.

- In our sample data, we have a sample of 40 cases. 
- Let's test if the average VIQ of people is significantly bigger than 109.

In [7]:
# Brain size and weight and IQ data (Willerman et al. 1991)
df = pd.read_csv("Data/brain_size.csv", sep=";", na_values = ".", index_col = 0)

In [8]:
df.head()

Unnamed: 0,Gender,FSIQ,VIQ,PIQ,Weight,Height,MRI_Count
1,Female,133,132,124,118.0,64.5,816932
2,Male,140,150,124,,72.5,1001121
3,Male,139,123,150,143.0,73.3,1038437
4,Male,133,129,128,172.0,68.8,965353
5,Female,137,132,134,147.0,65.0,951545


In [None]:
# H0: mean = 109
# H1: mean > 109

In [9]:
# Calculate the mean of VIQ
xbar = df.VIQ.mean()
xbar

112.35

In [10]:
# Calculate the std of VIQ
s = df.VIQ.std()
s

23.616107063199742

In [11]:
df.shape

(40, 7)

In [12]:
# Calculate the test statistic
t_test = (xbar-109) / (s/np.sqrt(df.shape[0]))

In [13]:
#test statistic
t_test

0.8971529586323551

In [14]:
# Calculate p-value
pValue = 1 - stats.t.cdf(t_test, 39)
pValue

0.18757115929257173

In [15]:
# Use stats.ttest_1samp() to calculate the test statistic and p-value

oneSamp = stats.ttest_1samp(df.VIQ, 109, alternative='greater')  # default is two-sided
oneSamp

Ttest_1sampResult(statistic=0.897152958632355, pvalue=0.1875711592925718)

In [16]:
#Display p-value
oneSamp.pvalue

0.1875711592925718

In [17]:
# Compare p-value and alpha
alpha = 0.05

if oneSamp.pvalue < alpha:
    print('Reject The Null')
else:
    print('Fail to Reject The Null')

Fail to Reject The Null


***

<div class="alert alert-block alert-danger">
    
<b>One Sample T Test *(Two-Tailed)*</b>
    
</div>

In the population, the average IQ is 100. A team of scientists wants to test a new medication to see if it has a either a positive or negative effect on intelligence, or no effect at all. A sample of 30 participants who have taken the medication has a mean of 140 with a standard deviation of 20. Did the medication affect intelligence? (alpha=0.05)

In [18]:
mu = 100
s = 20
x = 140
n = 30

alpha = 0.05
dof = n-1

In [None]:
# HYPOTHESIS

- H0: mu = 100
- H1: mu != 100

![Inkedexample4_LI.jpg](attachment:Inkedexample4_LI.jpg)

In [None]:
# STATE DECISION RULE

# If t is less than -2.045, or greater than 2.045, reject the null hypothesis.

In [19]:
# TEST STATISTIC

t = (x-mu) / (s / (n**0.5))
t

10.954451150103322

In [20]:
t > 2.045

True

In [None]:
# Since t=10.96 is greater than 2.045, so we can reject the null hypothesis.

In [21]:
# P-VALUE

pValue = 2 * (1 - stats.t.cdf(t, dof))
pValue

8.021361352916756e-12

In [22]:
# CONCLUSION (DECISION)

alpha = 0.05 # significance level

if pValue < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Reject the Null


***

<div class="alert alert-block alert-danger">
    
<b>One Sample Z Test *(Two-Tailed)*</b>
    
</div>

In the population, the average IQ is 100 with a standard deviation of 15. A team of scientists wants to test a new medication to see if it has a either a positive or negative effect on intelligence, or no effect at all. A sample of 30 participants who have taken the medication has a mean of 140. Did the medication affect intelligence?

In [23]:
mu = 100
sigma = 15
x = 140
n = 30

In [None]:
# HYPOTHESIS

- H0: mu = 100
- H1: mu != 100

In [24]:
# STATE DECISION RULE

alpha = 0.05

![z-score%20for%20pvalue_LI_2.jpg](attachment:z-score%20for%20pvalue_LI_2.jpg)

In [None]:
# If z is less than -1.96, or greater than 1.96, reject the null hypothesis.

In [25]:
# TEST STATISTIC

z = (x-mu) / (sigma / (n**0.5))
z

14.60593486680443

In [26]:
z > 1.96

True

In [None]:
# Since z=14.61 is greater than 1.96, so we can reject the null hypothesis.

In [27]:
# P-VALUE

pValue = 2 * (1-stats.norm.cdf(z))
pValue

0.0

In [28]:
# CONCLUSION (DECISION)

alpha = 0.05 # significance level

if pValue < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Reject the Null


***

<div class="alert alert-block alert-danger">
    
<b>One Sample Z Test *(Two-Tailed)*</b>
    
</div>

Suppose the IQ in a certain population is normally distributed with a mean of μ = 100 and standard deviation of σ = 15.

A researcher wants to know if a new drug affects IQ levels, so he recruits 20 patients to try it and records their IQ levels.

In [29]:
from statsmodels.stats.weightstats import ztest as ztest

#enter IQ levels for 20 patients
data = [88, 92, 94, 94, 96, 97, 97, 97, 99, 99,
        105, 109, 109, 109, 110, 112, 112, 113, 114, 115]

#perform one sample z-test
result = ztest(data, value=100)
result

(1.5976240527147705, 0.1101266701438426)

In [30]:
# CONCLUSION (DECISION)

alpha = 0.05 # significance level

if result[1] < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Fail to Reject the Null


In [137]:
# Since this p-value is not less than .05, we do not have sufficient evidence to reject the null hypothesis. 
# In other words, the new drug does not significantly affect IQ level.

***

<div class="alert alert-block alert-danger">
    
<b>One Sample Z Test *(One-Tailed)*</b>
    
</div>

Suppose that a beach is safe to swim if the mean level of lead in the water is 10.0 parts/million with standard deviation 1.5.
Water safety is going to be determined by taking 40 water samples (samples mean: 10.5) and using the test statistic. (alpha: 0.05)

In [31]:
mu = 10
sigma = 1.5
x = 10.5
n = 40

In [None]:
# HYPOTHESIS

- H0: mu = 10.0
- H1: mu > 10.0

In [None]:
# STATE DECISION RULE

# If z is greater than 1.645, reject the null hypothesis.

In [32]:
# TEST STATISTIC

z = (x-mu) / (sigma / (n**0.5))
z

2.1081851067789197

In [33]:
z > 1.645

True

In [None]:
# Since z=2.11 is greater than 1.645, so we can reject the null hypothesis.

In [34]:
# P-VALUE

pValue = 1 - stats.norm.cdf(z)
pValue

0.017507490509831247

In [35]:
# CONCLUSION (DECISION)

alpha = 0.05 # significance level

if pValue < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Reject the Null


In [None]:
# There is sufficient evidence to close the beach

***

<div class="alert alert-block alert-danger">
    
<b>One Sample Z Test *(One-Tailed)*</b>
    
</div>

A department store manager determines that a new billing system will be cost-effective only if the mean monthly account is more than \\$170. 
A random sample of 400 monthly accounts is drawn, for which the sample mean is \\$178. The accounts are approximately normally distributed with a standard deviation of \\$65.
Can we conclude that the new system will be cost-effective?

In [36]:
mu = 170
sigma = 65
x = 178
n = 400

In [None]:
# HYPOTHESIS

- H0: mu = 170
- H1: mu > 170

In [None]:
# STATE DECISION RULE

# If z is greater than 1.645, reject the null hypothesis.

In [37]:
# TEST STATISTIC

z = (x-mu) / (sigma / (n**0.5))
z

2.4615384615384617

In [38]:
z > 1.645

True

In [None]:
# Since z=2.46 is greater than 1.645, so we can reject the null hypothesis.

In [39]:
# P-VALUE

pValue = 1 - stats.norm.cdf(z)
pValue

0.006917128192854505

![Inkedz-score%20for%20pvalue_LI.jpg](attachment:Inkedz-score%20for%20pvalue_LI.jpg)

In [40]:
p = 1 - 0.9931
p

0.006900000000000017

In [41]:
# CONCLUSION (DECISION)

alpha = 0.05 # significance level

if pValue < alpha:
    print("Reject the Null")
else:
    print('Fail to Reject the Null')

Reject the Null


In [None]:
# So it is cost effective to install the new billing system