<a href="https://colab.research.google.com/github/francji1/01NAEX/blob/main/code/01NAEX_Exercise01_solution_Khol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 01NAEX - Exercise 01
Data and exercises come from D.C. Montgomery: Design and Analysis of Experiment


## Setup

In [None]:
!pip install rpy2

In [None]:
%load_ext rpy2.ipython

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import norm, t, f, shapiro


# Example from the Lecture

In [None]:
# Data arrays
y1 = np.array([16.85,16.40,17.21,16.35,16.52,17.04,16.96,17.15,16.59,16.57])  # Modified Mortar
y2 = np.array([16.62,16.75,17.37,17.12,16.98,16.87,17.34,17.02,17.08,17.27])  # Unmodified Mortar

# Sample variances
s1_squared = np.var(y1, ddof=1)
s2_squared = np.var(y2, ddof=1)

# Degrees of freedom
dfn = len(y1) - 1  # Degrees of freedom numerator
dfd = len(y2) - 1  # Degrees of freedom denominator

# F-test statistic
F = s1_squared / s2_squared

# Two-tailed p-value
p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))

print('F-statistic:', F)
print('Degrees of freedom:', dfn, 'and', dfd)
print('p-value:', p_value)

In [None]:
# Independent two-sample t-test (equal variances)
t_statistic, p_value = stats.ttest_ind(y1, y2, equal_var=True)

print(f't-statistic: {t_statistic}')
print(f'p-value: {p_value}')

In [None]:
# Welch's t-test (unequal variances)
t_stat, p_value = stats.ttest_ind(y1, y2, equal_var=False)

print('Welch\'s t-statistic:', t_stat)
print('p-value:', p_value)

In [None]:
# 1. Two-sample t-test assuming equal variances (var.equal = TRUE in R)
t_stat_equal_var, p_value_equal_var = stats.ttest_ind(y1, y2, equal_var=True)

# Calculate confidence interval for equal variance
n1, n2 = len(y1), len(y2)
mean_diff = np.mean(y1) - np.mean(y2)
pooled_std = np.sqrt(((n1 - 1) * np.var(y1, ddof=1) + (n2 - 1) * np.var(y2, ddof=1)) / (n1 + n2 - 2))
se_pooled = pooled_std * np.sqrt(1/n1 + 1/n2)
conf_interval_equal_var = stats.t.interval(0.95, df=n1 + n2 - 2, loc=mean_diff, scale=se_pooled)

# 2. Welch's t-test (var.equal = FALSE in R)
t_stat_unequal_var, p_value_unequal_var = stats.ttest_ind(y1, y2, equal_var=False)
df_unequal_var = ((np.var(y1, ddof=1)/n1 + np.var(y2, ddof=1)/n2)**2) / \
                 ((np.var(y1, ddof=1)/n1)**2/(n1-1) + (np.var(y2, ddof=1)/n2)**2/(n2-1))

# Calculate confidence interval for unequal variance
se_unequal = np.sqrt(np.var(y1, ddof=1)/n1 + np.var(y2, ddof=1)/n2)
conf_interval_unequal_var = stats.t.interval(0.95, df=df_unequal_var, loc=mean_diff, scale=se_unequal)

# Results for t-test assuming equal variances
print("Two-Sample T-Test Assuming Equal Variances")
print(f"t-statistic: {t_stat_equal_var}")
print(f"p-value: {p_value_equal_var}")
print(f"95% confidence interval: {conf_interval_equal_var}")
print(f"Mean of y1: {np.mean(y1)}, Mean of y2: {np.mean(y2)}")
print()

# Results for Welch's t-test (unequal variances)
print("Welch's Two-Sample T-Test (Assuming Unequal Variances)")
print(f"t-statistic: {t_stat_unequal_var}")
print(f"p-value: {p_value_unequal_var}")
print(f"Degrees of freedom: {df_unequal_var}")
print(f"95% confidence interval: {conf_interval_unequal_var}")
print(f"Mean of y1: {np.mean(y1)}, Mean of y2: {np.mean(y2)}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Kernel Density Plot
sns.kdeplot(y1, fill=True, label='Modified Mortar')
sns.kdeplot(y2, fill=True, label='Unmodified Mortar')
plt.title('Kernel Density Estimation of Mortar Data')
plt.xlabel('Tension Bond Strength')
plt.legend()
plt.show()

In [None]:
# QQ-Plot for y1
plt.figure()
stats.probplot(y1, dist="norm", plot=plt)
plt.title('Normal QQ-Plot for Modified Mortar')
plt.show()

# QQ-Plot for y2
plt.figure()
stats.probplot(y2, dist="norm", plot=plt)
plt.title('Normal QQ-Plot for Unmodified Mortar')
plt.show()

In [None]:
# Shapiro-Wilk test for y1
statistic_y1, p_value_y1 = shapiro(y1)
print(f'Shapiro-Wilk Test for y1: Statistic={statistic_y1}, p-value={p_value_y1}')

# Shapiro-Wilk test for y2
statistic_y2, p_value_y2 = shapiro(y2)
print(f'Shapiro-Wilk Test for y2: Statistic={statistic_y2}, p-value={p_value_y2}')

In [None]:
from statsmodels.stats.power import TTestIndPower

# Parameters
effect_size = (np.mean(y1) - np.mean(y2)) / np.sqrt((s1_squared + s2_squared) / 2)
alpha = 0.05
power = 0.95

# Create an instance of the power analysis class
analysis = TTestIndPower()

# Calculate required sample size
sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, alternative='two-sided')
print(f'Required sample size per group: {np.ceil(sample_size)}')

# Calculate power of the test with n=10
actual_power = analysis.power(effect_size=effect_size, nobs1=10, alpha=alpha, alternative='two-sided')
print(f'Power of the test with n=10 per group: {actual_power}')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.power import TTestIndPower

# Parameters
alpha = 0.05
power = 0.80
sd = 0.284  # Standard deviation
effect_sizes = np.array([0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6])

# Calculate sample sizes
analysis = TTestIndPower()
sample_sizes = []
for delta in effect_sizes:
    effect_size = delta / sd
    n = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, alternative='two-sided')
    sample_sizes.append(n)

# Plotting
plt.plot(effect_sizes, sample_sizes, marker='o')
plt.xlabel('Effect Size')
plt.ylabel('Sample Size per Group')
plt.title('Sample Size vs. Effect Size')
plt.grid(True)
plt.show()


## Assigment:

* Run and familiarize with Python.
* Solve problems 2.20, 2.26, 2.30 from following slides.
  (originally from Montgomery - Design and Analysis of Experiments).


### Exercises 2.20

The shelf life of a carbonated beverage is of interest. Ten bottles are randomly
selected and tested, and the following results are obtained:

| Days ||
|--------------------------------------------||
| 108      |  138  |
| 124      |  163  |
| 124      |  159  |
| 106      |  134  |
| 115      |  139  |

* We would like to demonstrate that the mean shelf life exceeds 120 days.
Set up appropriate hypotheses for investigating this claim.
* Test these hypotheses using significant level $\alpha = 0.01$. Find the P-value
for the test. What are your conclusions?
* Construct a 99 percent confidence interval on the mean shelf life.

In [None]:
# Read the data from the URL
url_20 = "https://raw.githubusercontent.com/francji1/01NAEX/main/data/Ex02_20.csv"
df20 = pd.read_csv(url_20, sep=";")

# Display the first few rows of the dataframe
df20.head()

# Calculate sample mean and standard deviation
sample_mean = np.mean(df20)
sample_std = np.std(df20, ddof=1)
n = len(df20)

# Hypothesized mean
mu_0 = 120

# t-test for one sample
t_statistic = (sample_mean - mu_0) / (sample_std / np.sqrt(n))
df = n - 1  # degrees of freedom

# p-value for the one-tailed test
p_value = 1 - t.cdf(t_statistic, df)

# Confidence Interval: 99%
confidence_level = 0.99
t_critical = t.ppf((1 + confidence_level) / 2, df)
margin_error = t_critical * (sample_std / np.sqrt(n))
ci_lower = sample_mean - margin_error
ci_upper = sample_mean + margin_error

(sample_mean, sample_std, t_statistic, p_value, (ci_lower, ci_upper))


In [None]:
print(f"Sample Mean: {sample_mean}")
print(f"Sample Standard Deviation: {sample_std}")
print(f"t-Statistic: {t_statistic}")
print(f"P-Value: {p_value}")
print(f"Confidence Interval: ({ci_lower}, {ci_upper})")

SOLUTION:


### Exercise 2.26

The following are the burning times (in minutes) of chemical flares of two different formulations. The design engineers are interested in both the mean and
variance of the burning times.

|Type1|| Type2 ||
|--------------------------------||||
| 65 | 82 | 64 | 56 |
| 81 | 67 | 71 | 69 |
| 57 | 59 | 83 | 74 |
| 66 | 75 | 59 | 82 |
| 82 | 70 | 65 | 79 |


1. Test the hypothesis that the two variances are equal. Use $\alpha = 0.05$.
2. Using the results of part 1), test the hypothesis that the mean burning
times are equal. Use $\alpha = 0.05$. What is the P-value for this test?
3. Discuss the role of the normality assumption in this problem. Check the
assumption of normality for both types of flares
4. If the mean burning times of the two flares differ by as much as 2 minute, find the power of the test. What sample size would be required to detect an actual difference in mean burning time of 1 minute with a power of at least 0.9?

In [None]:
# Read the data from the URL
url_26 = "https://raw.githubusercontent.com/francji1/01NAEX/main/data/Ex02_26.csv"
df26 = pd.read_csv(url_26, sep=";")

# Display the first few rows of the dataframe
df26.head()

SOLUTION:

In [None]:
print("Type1 Data:", df26.Type1)
print("Type2 Data:", df26.Type2)
print("Variance of Type1:", np.var(df26.Type1, ddof=1))
print("Variance of Type2:", np.var(df26.Type2, ddof=1))
print("Mean of Type1:", np.mean(df26.Type1))
print("Mean of Type2:", np.mean(df26.Type2))
print("Number of observations in Type1:", len(df26.Type1))
print("Number of observations in Type2:", len(df26.Type2))

In [None]:
from scipy.stats import levene, ttest_ind, shapiro, norm, bartlett
from statsmodels.stats.power import TTestIndPower
type1 = df26['Type1'].dropna().values  # Assuming 'Type1' is the column name
type2 = df26['Type2'].dropna().values  # Assuming 'Type2' is the column name

# 1. Test for equality of variances
alpha = 0.05
stat, p_value = bartlett(type1, type2)
print(f'Bartlett’s test for equal variances: Statistic = {stat}, p-value = {p_value}')


In [None]:
# Sample variances
type1_squared = np.var(type1, ddof=1)
type2_squared = np.var(type2, ddof=1)
print(f'type1_squared: {type1_squared}')
print(f'type2_squared: {type2_squared}')

# Degrees of freedom
dfn = len(type1) - 1  # Degrees of freedom numerator
dfd = len(type2) - 1  # Degrees of freedom denominator

# F-test statistic
F = type1_squared / type2_squared
print(f'F-statistic: {F}')

# Two-tailed p-value
p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))
print(f'p-value: {p_value}')

In [None]:
# 2. Test for equality of means
if p_value > alpha:
    # Variances are equal
    t_stat, p_mean = ttest_ind(type1, type2, equal_var=True)
else:
    # Variances are not equal
    t_stat, p_mean = ttest_ind(type1, type2, equal_var=False)

print(f'T-test for equal means: t-statistic = {t_stat}, p-value = {p_mean}')


In [None]:
# 3. Check for normality in both distributions
stat1, p_norm1 = shapiro(type1)
stat2, p_norm2 = shapiro(type2)
print(f'Normality test for Type1: Statistic = {stat1}, p-value = {p_norm1}')
print(f'Normality test for Type2: Statistic = {stat2}, p-value = {p_norm2}')


In [None]:
# Plotting QQ plots for both Type1 and Type2
plt.figure(figsize=(12, 6))

# QQ plot for Type1
plt.subplot(1, 2, 1)
stats.probplot(type1, dist="norm", plot=plt)
plt.title('QQ Plot of Type1')

# QQ plot for Type2
plt.subplot(1, 2, 2)
stats.probplot(type2, dist="norm", plot=plt)
plt.title('QQ Plot of Type2')

plt.tight_layout()
plt.show()

In [None]:
# 4. Calculate power of the test with n=10

print(f'actual mean difference {np.mean(type1) - np.mean(type2)}')
print(f'actual S {np.sqrt((type1_squared + type2_squared) / 2)}')

S_p = np.sqrt((type1_squared + type2_squared) / 2)
mean_difference = 2

effect_size = (mean_difference/ S_p)
print(f'effect_size: {effect_size}')

alpha = 0.05
power = 0.95

actual_power = analysis.power(effect_size=effect_size, nobs1=10, alpha=alpha, alternative='two-sided')
print(f'Power of the test with n=10 per group: {actual_power}')

In [None]:
# 4. Power analysis to determine sample size
effect_size = 2 / np.sqrt((np.var(type1, ddof=1) + np.var(type2, ddof=1)) / 2)
power_analysis = TTestIndPower()
sample_size = power_analysis.solve_power(effect_size=effect_size, alpha=alpha, power=0.9, alternative='two-sided')
print(f'Required sample size to detect a 2 minute difference with 90% power: {sample_size:.2f}')


In [None]:
# Repeating power analysis for a 1 minute difference
effect_size = 1 / S_p
sample_size = power_analysis.solve_power(effect_size=effect_size, alpha=alpha, power=0.9, alternative='two-sided')
print(f'Required sample size to detect a 1 minute difference with 90% power: {sample_size:.2f}')

Neparametrické teyst a Levene mi tak trochu selhávají...

In [None]:
from scipy.stats import fligner
stat, p_value = fligner(type1, type2)
print(f'Fligner-Killeen test for equal variances: Statistic = {stat}, p-value = {p_value}')

In [None]:
from scipy.stats import levene
levene(type1, type2)

### Exercise 2.30

Front housings for cell phones are manufactured in an injection molding process. The time the part is allowed to cool in the mold before removal is thought to influence the occurrence of a particularly troublesome cosmetic defect, flow lines, in the finished housing. After manufacturing, the housings are inspected visually and assigned a score between 1 and 10 based on their appearance, with 10 corresponding to a perfect part and 1 corresponding to a completely defective part. An experiment was conducted using two cool-down times, 10 and 20 seconds, and 20 housings were evaluated at each level of cool-down time. All 40 observations in this experiment were run in random order.


| 10s |||| 20s ||||
|--------------------------------||||||||
| 1 | 3 | 2 | 6 | 7 | 6 | 8 | 9 |
| 1 | 5 | 3 | 3 | 5 | 5 | 9 | 7 |
| 5 | 2 | 1 | 1 | 5 | 4 | 8 | 6 |
| 5 | 6 | 2 | 8 | 6 | 8 | 4 | 5 |
| 3 | 2 | 5 | 3 | 6 | 8 | 7 | 7 |


* Is there evidence to support the claim that the longer cool-down time
results in fewer appearance defects? Use $\alpha = 0.05$.
* What is the P-value for the test conducted in the previous part?
* Find a 95 percent confidence interval on the difference in means. Provide
a practical interpretation of this interval.
* Compute the power of the test.


In [None]:
# Read the data from the URL
url_30 = "https://raw.githubusercontent.com/francji1/01NAEX/main/data/Ex02_30.csv"
df30 = pd.read_csv(url_30, sep=";")

# Display the first few rows of the dataframe
df30.head()

SOLUTION:

In [None]:
from scipy.stats import ttest_ind, t
from statsmodels.stats.power import TTestIndPower

cooldown_10s = df30['10 seconds'].dropna().to_numpy()
cooldown_20s = df30['20 seconds'].dropna().to_numpy()


In [None]:
# Perform a two-sample t-test to compare means
t_stat, p_value = ttest_ind(cooldown_20s, cooldown_10s, equal_var=False)  # Assuming unequal variances
print(f'T-statistic: {t_stat}, P-value: {p_value}')


In [None]:
# Calculate the mean difference and the confidence interval for this difference
mean_diff = np.mean(cooldown_20s) - np.mean(cooldown_10s)
std_10s = np.std(cooldown_10s, ddof=1)
std_20s = np.std(cooldown_20s, ddof=1)
se_diff = np.sqrt((std_10s**2 / len(cooldown_10s)) + (std_20s**2 / len(cooldown_20s)))


In [None]:
# 95% Confidence Interval
ci_lower = mean_diff - 1.96 * se_diff
ci_upper = mean_diff + 1.96 * se_diff
print(f'95% Confidence Interval for the mean difference: ({ci_lower}, {ci_upper})')


In [None]:
from scipy.stats import t
df = len(cooldown_20s) + len(cooldown_10s) - 2
t_crit = t.ppf(0.975, df)  # 95% confidence level (two-tailed)

# Confidence interval
ci_low = mean_diff - t_crit * se_diff
ci_high = mean_diff + t_crit * se_diff
print(f"95% Confidence Interval: ({ci_low}, {ci_high})")


In [None]:
# Compute the power of the test
effect_size = (np.mean(cooldown_20s) - np.mean(cooldown_10s)) / np.sqrt(((std_10s**2 + std_20s**2) / 2))
analysis = TTestIndPower()
power = analysis.power(effect_size=effect_size, nobs1=len(cooldown_10s), alpha=0.05, alternative='two-sided')
print(f'Power of the test: {power}')

In [None]:
import numpy as np
import statsmodels.stats.api as sms

cm = sms.CompareMeans(sms.DescrStatsW(cooldown_20s), sms.DescrStatsW(cooldown_10s))

# Get the 95% confidence interval
conf_int = cm.tconfint_diff(usevar='unequal')  # For unequal variance (Welch's t-test)

# Perform t-test
from scipy.stats import ttest_ind
t_stat, p_value = ttest_ind(cooldown_20s, cooldown_10s, equal_var=False)

print(f"T-statistic: {t_stat}, P-value: {p_value}")
print(f"95% Confidence Interval: {conf_int}")


In [None]:
import numpy as np
from scipy.stats import t, ttest_ind

# Assuming cooldown_20s and cooldown_10s are your datasets
mean_diff = np.mean(cooldown_20s) - np.mean(cooldown_10s)

# Calculate sample variances and sizes
s1_squared = np.var(cooldown_20s, ddof=1)
s2_squared = np.var(cooldown_10s, ddof=1)
n1 = len(cooldown_20s)
n2 = len(cooldown_10s)

# Welch-Satterthwaite formula for degrees of freedom
df = ((s1_squared/n1 + s2_squared/n2)**2) / ((s1_squared/n1)**2 / (n1 - 1) + (s2_squared/n2)**2 / (n2 - 1))

# Perform t-test
t_stat, p_value = ttest_ind(cooldown_20s, cooldown_10s, equal_var=False)

# Compute standard error of the difference
se_diff = np.sqrt(s1_squared/n1 + s2_squared/n2)

# Get the critical t-value
t_crit = t.ppf(0.975, df)  # 95% confidence level (two-tailed)

# Confidence interval
ci_low = mean_diff - t_crit * se_diff
ci_high = mean_diff + t_crit * se_diff

print(f"T-statistic: {t_stat}, P-value: {p_value}")
print(f"95% Confidence Interval: ({ci_low}, {ci_high})")


In [None]:
import numpy as np
from scipy.stats import t, ttest_ind

mean_diff = np.mean(cooldown_20s) - np.mean(cooldown_10s)

# Calculate sample variances and sizes
s1_squared = np.var(cooldown_20s, ddof=1)
s2_squared = np.var(cooldown_10s, ddof=1)
n1 = len(cooldown_20s)
n2 = len(cooldown_10s)

# Pooled variance formula
s_p_squared = ((n1 - 1) * s1_squared + (n2 - 1) * s2_squared) / (n1 + n2 - 2)

# Perform t-test assuming equal variance
t_stat, p_value = ttest_ind(cooldown_20s, cooldown_10s, equal_var=True)

# Compute standard error of the difference using pooled variance
se_diff = np.sqrt(s_p_squared * (1/n1 + 1/n2))

# Degrees of freedom
df = n1 + n2 - 2

# Get the critical t-value
t_crit = t.ppf(0.975, df)  # 95% confidence level (two-tailed)

# Confidence interval
ci_low = mean_diff - t_crit * se_diff
ci_high = mean_diff + t_crit * se_diff

print(f"T-statistic: {t_stat}, P-value: {p_value}")
print(f"95% Confidence Interval: ({ci_low}, {ci_high})")
