In [1]:
import numpy as np
from scipy.stats import norm

# Null Hypothesis (H0): The proportion of customers dissatisfied with the new system is less than or equal to 30%.(p ≤ 0.30)
# Alternative Hypothesis (H1): The proportion of customers dissatisfied with the new system is greater than 30%.(p > 0.30).

n = 250 # Sample size
x = 65 # Number of customers dissatisfied with the new system
p_hat = x/n # Sample proportion
p = 0.30 # Hypothesized proportion

# Calculate test statistic value for one sample proportion test
Z = (p_hat - p) / np.sqrt((p * (1 - p)) / n)
print('Test statistic:',Z)

# Calculate the p-value for the test statistic
p_value = 1 - norm.cdf(Z)
print('p-value:', p_value)

# Define the significance level
alpha = 0.05

# Make a decision based on the p-value and significance level
if p_value < alpha:
  print('Reject the null hypothesis.')
else:
  print('Fail to reject the null hypothesis.')

Test statistic: -1.3801311186847078
p-value: 0.9162268612556912
Fail to reject the null hypothesis.


In [2]:
import statsmodels.api as sm
import numpy as np

# Define the data
conversions = np.array([50, 30])  # Number of conversions for Web Page A and Web Page B # x_1, x_2
visits = np.array([1000, 500])  # Number of visits for Web Page A and Web Page B # n_1, n_2

# Perform the Z-proportions test
z_stat, p_value = sm.stats.proportions_ztest(conversions, visits, alternative='two-sided')

# Print the results
print(f"Z-statistic = {z_stat}")
print(f"P-value = {p_value}")

Z-statistic = -0.8125338562826986
P-value = 0.4164853677823287


In [3]:
import numpy as np
import scipy.stats as stats

# Step 1: Define the data
# Data for the old web page (Web Page A)
n_1 = 1000 # visits_1
x_1 = 50 # conversions_1

# Data for the new web page (Web Page B)
n_2 = 500 # visits_2
x_2 = 30 # conversions_2

# Step 2: Define the hypotheses
# Null Hypothesis (H0): Conversion rates are the same.
# Alternative Hypothesis (Ha): Conversion rates are different.
# This is a two-tailed test.
p_1_hat = x_1 / n_1
p_2_hat = x_2 / n_2

# p_hat: is the combined sample proportion for both web pages.
p_hat = (x_1 + x_2) / (n_1 + n_2)

# Step 3: Calculate the test statistic (Z)
Z = (p_1_hat - p_2_hat) / np.sqrt(p_hat * (1 - p_hat) * ((1 / n_1) + (1 / n_2)))

# Step 4: Interpret the test statistic
# Z follows a standard normal distribution. We will calculate the two-tailed p-value next.

# Step 5: Calculate the p-value
p_value = 2 * (1 - stats.norm.cdf(np.abs(Z)))

# Print the results
print(f"Z = {Z}")
print(f"P-value = {p_value}")

Z = -0.8125338562826986
P-value = 0.4164853677823288


In [4]:
  # Make a decision and conclusion
  if p_value < alpha:
      decision = "Reject the null hypothesis"
  else:
      decision = "Fail to reject the null hypothesis"

  if decision == "Reject the null hypothesis":
      conclusion = "There is enough evidence to conclude that the conversion rates of the new web page (Web Page B) are statistically different from the old web page (Web Page A)."
  else:
      conclusion = "There is not enough evidence to conclude that the conversion rates of the new web page (Web Page B) are statistically different from the old web page (Web Page A)."

  # Print the results
  print(f"Decision: {decision}")
  print(f"Conclusion: {conclusion}")

Decision: Fail to reject the null hypothesis
Conclusion: There is not enough evidence to conclude that the conversion rates of the new web page (Web Page B) are statistically different from the old web page (Web Page A).


In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

A french cake shop claims that the average number of pastries they can produce in a day exceeds 500.
The average number of pastries produced per day over a 70 day period was found to be 530.
Assume that the population standard deviation for the pastries produced per day is 125.

Test the claim using a z-test with the critical z-value = 1.64 at the alpha (significance level) = 0.05, and state your interpretation.


In [6]:
z_stat = (530 - 500) / (125/np.sqrt(70))
z_stat

2.007984063681781

In [7]:
from scipy.stats import norm

pvalue = 1 - norm.cdf(z_stat)
pvalue

0.022322492581293485

In [8]:
alpha = 0.05

if pvalue < alpha:
  print('Reject H0')
else:
  print ('Fail to Reject H0')

Reject H0


In [9]:
z = norm.ppf(0.95)
z

1.6448536269514722

In [10]:
x = 500 + (z*(125/np.sqrt(70)))
x

524.574701413748

In [11]:
# The pill is given to a few people and their IQ is tested with following results:
iq_scores = [110, 105, 98, 102, 99, 104, 115, 95]

Testing the Pill's Effectiveness:

Case 1: The new pill may increase the average IQ scores.

By enhancing cognitive functions, individuals taking the pill may show improved performance in IQ tests.
Case 2: The new pill may have no significant effect on IQ scores.

The researchers need to verify whether the pill is effective or not before widespread use.

In [12]:
np.mean(iq_scores)

103.5

In [13]:
from scipy.stats import ttest_1samp

In [15]:
t_stat, pvalue = ttest_1samp(iq_scores, 100)
t_stat, pvalue

(1.5071573172061195, 0.1754994493585011)

In [16]:
alpha = 0.01 # 99% confidence

if pvalue < alpha:
  print('Reject H0; Pill has effect')
else:
  print ('Fail to Reject H0; Pill has NO effect')

Fail to Reject H0; Pill has NO effect


### **Two Sample TTest**

In [18]:
df_iq = pd.read_csv("iq_two_schools.csv")
df_iq.head()

Unnamed: 0,School,iq
0,school_1,91
1,school_1,95
2,school_1,110
3,school_1,112
4,school_1,115


In [19]:
df_iq.groupby("School")['iq'].mean()

School
school_1    101.153846
school_2    109.416667
Name: iq, dtype: float64

In [20]:
from scipy.stats import ttest_ind

In [21]:
iq_1 = df_iq[df_iq['School']=="school_1"]['iq']
iq_2 = df_iq[df_iq['School']=="school_2"]['iq']
iq_1, iq_2

(0      91
 1      95
 2     110
 3     112
 4     115
 5      94
 6      82
 7      84
 8      85
 9      89
 10     91
 11     91
 12     92
 13     94
 14     99
 15     99
 16    105
 17    109
 18    109
 19    109
 20    110
 21    112
 22    112
 23    113
 24    114
 25    114
 Name: iq, dtype: int64,
 26    112
 27    115
 28     95
 29     92
 30     91
 31     95
 32     91
 33     99
 34    111
 35    115
 36    108
 37    109
 38    109
 39    114
 40    115
 41    116
 42    117
 43    117
 44    128
 45    129
 46    130
 47    133
 48     95
 49     90
 Name: iq, dtype: int64)

In [23]:
t_score,pvalue  = ttest_ind(iq_1, iq_2)
t_score,pvalue

(-2.4056474861512704, 0.02004552710936217)

In [24]:
if pvalue < 0.05:
    print("Rejecting the null hypothesis")
else:
    print("Failed to Reject Null Hypothesis")

Rejecting the null hypothesis
