In [5]:
# Q1: Assumptions of ANOVA
"""
The assumptions for ANOVA include:
1. Independence: Observations are independent of each other.
   Example Violation: Participants in different groups influencing each other.
2. Normality: The data within each group should follow a normal distribution.
   Example Violation: Skewed data in one group.
3. Homogeneity of Variance: Variances across groups should be equal.
   Example Violation: One group having significantly higher variance than others.

Violations can lead to incorrect conclusions. Non-parametric tests like Kruskal-Wallis can be alternatives when assumptions are violated.
"""

# Q2: Types of ANOVA
"""
1. One-Way ANOVA: Tests for differences among means of more than two groups.
   Example: Comparing the test scores of students in three different schools.
2. Two-Way ANOVA: Tests for main effects and interaction effects of two independent variables.
   Example: Comparing test scores by teaching method and gender.
3. Repeated Measures ANOVA: Tests for differences in means within the same subjects over time or conditions.
   Example: Measuring blood pressure before, during, and after a treatment.
"""

# Q3: Partitioning of Variance in ANOVA
"""
Partitioning variance involves decomposing the total variance (SST) into:
1. Explained Variance (SSE): Variance due to differences between group means.
2. Residual Variance (SSR): Variance within groups.

Understanding this helps determine whether differences between groups are significant or due to random variation.
"""

# Q4: Calculate SST, SSE, and SSR in Python
import numpy as np

# Example data
data = {'Group A': [5, 6, 7], 'Group B': [8, 9, 10], 'Group C': [11, 12, 13]}
all_data = np.concatenate(list(data.values()))
overall_mean = np.mean(all_data)

# SST
sst = sum((x - overall_mean) ** 2 for x in all_data)

# SSE
sse = sum(len(group) * (np.mean(group) - overall_mean) ** 2 for group in data.values())

# SSR
ssr = sst - sse
"""
SST: (sst), SSE: (sse), SSR: (ssr)
"""

# Q5: Main Effects and Interaction Effects in Two-Way ANOVA
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pandas as pd

# Example dataset for Two-Way ANOVA
data = pd.DataFrame({
    'Program': ['A', 'A', 'B', 'B', 'C', 'C'] * 5,
    'Experience': ['Novice', 'Experienced'] * 15,
    'Time': np.random.randint(10, 20, 30)
})

model = ols('Time ~ C(Program) + C(Experience) + C(Program):C(Experience)', data=data).fit()
anova_results = sm.stats.anova_lm(model, typ=2)
"""
ANOVA Results: (anova_results)
"""

# Q6: One-Way ANOVA Example
from scipy.stats import f_oneway

# Example data
diet_a = [4, 5, 6, 7]
diet_b = [8, 9, 10, 11]
diet_c = [12, 13, 14, 15]

f_stat, p_value = f_oneway(diet_a, diet_b, diet_c)
"""
F-Statistic: (f_stat), p-value: (p_value)
"""

# Q7: Missing Data in Repeated Measures ANOVA
"""
Handling missing data options:
1. Listwise Deletion: Removes rows with missing data.
   Potential Issue: Reduces sample size.
2. Imputation: Replaces missing data with estimated values.
   Potential Issue: Introduces bias if not done carefully.
"""

# Q8: Post-Hoc Tests After ANOVA
"""
Examples:
1. Tukey's HSD: For pairwise comparisons.
   Example: Comparing mean differences among multiple groups.
2. Bonferroni: Adjusts significance levels to control Type 1 error.
   Example: Post-hoc comparisons in clinical trials.

When to use: After finding a significant ANOVA result.
"""

# Q9: One-Way ANOVA for Weight Loss
weights = {
    'A': np.random.normal(5, 1, 50),
    'B': np.random.normal(6, 1, 50),
    'C': np.random.normal(7, 1, 50)
}
f_stat, p_value = f_oneway(weights['A'], weights['B'], weights['C'])
"""
F-Statistic: (f_stat), p-value: (p_value)
"""

# Q10: Two-Way ANOVA for Software Programs
"""
Refer to Q5 for example implementation.
"""

# Q11: Two-Sample t-Test for Teaching Methods
group_a = np.random.normal(70, 10, 100)
group_b = np.random.normal(75, 10, 100)

t_stat, p_value = stats.ttest_ind(group_a, group_b)
"""
t-Statistic: (t_stat), p-value: (p_value)
"""

# Q12: Repeated Measures ANOVA for Retail Sales
"""
Example implementation requires specific repeated measures data.
"""


'\nExample implementation requires specific repeated measures data.\n'