In [1]:
import numpy as np
import pandas as pd
from scipy import stats


np.random.seed(42)


n = 200
gender = np.random.choice(['Male', 'Female'], size=n)
age = np.random.randint(25, 65, size=n)
tenure = np.random.choice(['Tenured', 'Non-Tenured'], size=n)
beauty_score = np.random.normal(loc=5, scale=1.5, size=n)  # scale = std dev
beauty_score = np.clip(beauty_score, 1, 10)  # limit between 1–10
teaching_evaluation = (
    0.3 * beauty_score +
    np.where(gender == 'Female', 0.2, 0) +
    np.random.normal(0, 0.5, n) + 4
)
teaching_evaluation = np.clip(teaching_evaluation, 1, 10)

df = pd.DataFrame({
    'gender': gender,
    'age': age,
    'tenure': tenure,
    'beauty_score': beauty_score,
    'teaching_evaluation': teaching_evaluation
})

print("First 5 rows of the dataset:\n", df.head())

First 5 rows of the dataset:
    gender  age       tenure  beauty_score  teaching_evaluation
0    Male   56      Tenured      3.282577             5.046026
1  Female   63      Tenured      5.886974             6.952823
2    Male   56  Non-Tenured      3.675382             4.913086
3    Male   28  Non-Tenured      3.950933             4.655490
4    Male   54      Tenured      4.372237             5.402424


Q1. T-Test: Using the teachers' rating data set, does gender affect teaching evaluation rates?

In [2]:
male_eval = df.loc[df['gender'] == 'Male', 'teaching_evaluation']
female_eval = df.loc[df['gender'] == 'Female', 'teaching_evaluation']

t_stat, p_val = stats.ttest_ind(male_eval, female_eval)
print("\nQ1: T-Test — Gender vs Teaching Evaluation")
print(f"T-statistic = {t_stat:.3f}, P-value = {p_val:.3f}")
if p_val < 0.05:
    print("✅ Significant difference: Gender affects evaluation rates.")
else:
    print("❌ No significant difference: Gender does not affect evaluation rates.")


Q1: T-Test — Gender vs Teaching Evaluation
T-statistic = -1.776, P-value = 0.077
❌ No significant difference: Gender does not affect evaluation rates.


Q2. ANOVA: Using the teachers' rating data set, does beauty score for instructors differ by age?

In [3]:
df['age_group'] = pd.cut(df['age'], bins=[20, 35, 50, 65],
                         labels=['Young', 'Middle-aged', 'Old'])

groups = [df.loc[df['age_group'] == grp, 'beauty_score'] for grp in df['age_group'].unique()]
f_stat, p_val_anova = stats.f_oneway(*groups)

print("\nQ2: ANOVA — Beauty Score vs Age Group")
print(f"F-statistic = {f_stat:.3f}, P-value = {p_val_anova:.3f}")
if p_val_anova < 0.05:
    print("✅ Significant difference: Beauty scores differ by age group.")
else:
    print("❌ No significant difference: Beauty scores do not differ by age group.")


Q2: ANOVA — Beauty Score vs Age Group
F-statistic = 0.483, P-value = 0.618
❌ No significant difference: Beauty scores do not differ by age group.


Q3: Chi-square (Is there an association between tenure and gender?)

In [4]:
contingency = pd.crosstab(df['gender'], df['tenure'])
chi2, p_val_chi, dof, expected = stats.chi2_contingency(contingency)

print("\nQ3: Chi-Square — Tenure vs Gender")
print(f"Chi2 = {chi2:.3f}, P-value = {p_val_chi:.3f}")
if p_val_chi < 0.05:
    print("✅ Significant association: Tenure and Gender are related.")
else:
    print("❌ No significant association between tenure and gender.")


Q3: Chi-Square — Tenure vs Gender
Chi2 = 1.290, P-value = 0.256
❌ No significant association between tenure and gender.


Q4: Correlation (Is teaching evaluation correlated with beauty score?)

In [5]:
corr, p_val_corr = stats.pearsonr(df['teaching_evaluation'], df['beauty_score'])
print("\nQ4: Correlation — Teaching Evaluation vs Beauty Score")
print(f"Correlation Coefficient = {corr:.3f}, P-value = {p_val_corr:.3f}")
if p_val_corr < 0.05:
    print("✅ Significant positive correlation.")
else:
    print("❌ No significant correlation.")


Q4: Correlation — Teaching Evaluation vs Beauty Score
Correlation Coefficient = 0.659, P-value = 0.000
✅ Significant positive correlation.
