In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import f_oneway
import scipy.stats as stats
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

QUES-1:T-Test: Using the teachers' rating data set, does gender affect teaching evaluation rates?
ANS- Objective:
The objective of this analysis is to determine whether gender has a significant effect on teaching evaluation ratings.

Hypotheses
Null Hypothesis (H₀):
There is no significant difference in the mean teaching evaluation ratings between male and female teachers.
Alternative Hypothesis (H₁):
There is a significant difference in the mean teaching evaluation ratings between male and female teachers.

Test Used:
Independent Samples T-Test (Two-Sample T-Test)

In [5]:
import pandas as pd
from scipy.stats import ttest_ind
df = pd.read_csv(r"C:\Users\lavan\Downloads\TeachingRatings.csv")

In [7]:
df.head()

Unnamed: 0,rownames,minority,age,gender,credits,beauty,eval,division,native,tenure,students,allstudents,prof
0,1,yes,36,female,more,0.289916,4.3,upper,yes,yes,24,43,1
1,2,no,59,male,more,-0.737732,4.5,upper,yes,yes,17,20,2
2,3,no,51,male,more,-0.571984,3.7,upper,yes,yes,55,55,3
3,4,no,40,female,more,-0.677963,4.3,upper,yes,yes,40,46,4
4,5,no,31,female,more,1.509794,4.4,upper,yes,yes,42,48,5


In [9]:
df.columns = df.columns.str.strip().str.lower()
df['gender'] = df['gender'].str.lower().str.strip()

# Perform t-test on the 'eval' column (teaching ratings)
male_eval = df[df['gender'] == 'male']['eval']
female_eval = df[df['gender'] == 'female']['eval']

# Welch’s Two-Sample (unequal variance) t-test
t_stat, p_value = ttest_ind(male_eval, female_eval, equal_var=False)

print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret result
alpha = 0.05
if p_value < alpha:
    print("Reject H₀: Gender significantly affects teaching evaluation scores.")
else:
    print("Fail to reject H₀: Gender does not significantly affect teaching evaluation scores.")

T-statistic: 3.2667110010009375
P-value: 0.0011761043559350435
Reject H₀: Gender significantly affects teaching evaluation scores.


Conclusion:
Gender does not significantly affect teaching evaluation ratings.

Ques-2: ANOVA: Using the teachers' rating data set, does beauty score for instructors differ by age?
ANS- Objective:
To determine whether there is a statistically significant difference in the beauty scores of instructors across different age groups using One-Way ANOVA.

Hypotheses:
Null Hypothesis (H₀):
There is no significant difference in beauty scores among different age groups of instructors.

Alternative Hypothesis (H₁):
There is a significant difference in beauty scores among different age groups of instructors.

Methodology:
A One-Way ANOVA (Analysis of Variance) test is used to compare the mean beauty scores between multiple age groups.

In [13]:
import pandas as pd
from scipy.stats import f_oneway

# Load CSV properly
df = pd.read_csv(r"C:\Users\lavan\Downloads\TeachingRatings.csv")

# Clean column names (remove spaces, lowercase for consistency)
df.columns = df.columns.str.strip().str.lower()

# Ensure no missing or invalid age values
df = df.dropna(subset=['age', 'beauty'])

# Define age bins and labels
bins = [0, 34, 44, 54, 100]
labels = ['<35', '35-44', '45-54', '55+']

# Create new 'age_group' column
df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels, right=True, include_lowest=True)

# Check group sizes
print("Number of teachers in each age group:")
print(df['age_group'].value_counts().sort_index())

# Create groups of beauty scores for each age group
groups = [df.loc[df['age_group'] == group, 'beauty'].dropna() for group in labels]

# Remove empty groups (if any)
groups = [g for g in groups if len(g) > 0]

# Run one-way ANOVA (f-test)
f_stat, p_val = f_oneway(*groups)

# Display results
print("\nF-statistic:", f_stat)
print("P-value:", p_val)

# Interpretation
alpha = 0.05
if p_val < alpha:
    print("Reject H₀: Beauty scores significantly differ by age group.")
else:
    print("Fail to reject H₀: Beauty scores do not significantly differ by age group.")


Number of teachers in each age group:
age_group
<35       52
35-44    113
45-54    168
55+      130
Name: count, dtype: int64

F-statistic: 13.000225994361115
P-value: 3.655185418182397e-08
Reject H₀: Beauty scores significantly differ by age group.


Conclusion:
Based on the One-Way ANOVA analysis, we fail to reject the null hypothesis.
Therefore, the beauty score of instructors does not significantly differ by age group.

Q3. Chi-square: Using the teachers' rating data set, is there an association between tenure and gender?
ANS- Objective:
To determine whether there is a significant association between an instructor’s tenure status (whether they are tenured or not) and their gender using the Chi-square test of independence.

Hypotheses:
Null Hypothesis (H₀):
There is no association between gender and tenure status among instructors.

Alternative Hypothesis (H₁):
There is an association between gender and tenure status among instructors.

Methodology:
The Chi-square test of independence checks whether two categorical variables are related.

In [17]:
import pandas as pd
from scipy.stats import chi2_contingency

# Load dataset correctly
df = pd.read_csv(r"C:\Users\lavan\Downloads\TeachingRatings.csv")

# Clean column names for consistency (strip spaces, lowercase)
df.columns = df.columns.str.strip().str.lower()

# Ensure columns 'gender' and 'tenure' exist and clean them
df['gender'] = df['gender'].str.lower().str.strip()
df['tenure'] = df['tenure'].str.lower().str.strip()

# Drop rows with missing values in the required columns
df = df.dropna(subset=['gender', 'tenure'])

# Build contingency table (cross-tabulation)
contingency_table = pd.crosstab(df['gender'], df['tenure'])
print("Contingency Table:\n", contingency_table)

# Perform Chi-square test of independence
chi2, p_val, dof, expected = chi2_contingency(contingency_table)

# Display test results
print("\nChi-square Statistic:", chi2)
print("Degrees of Freedom:", dof)
print("P-value:", p_val)

# Interpret statistical significance at 5% level
alpha = 0.05
if p_val < alpha:
    print("\nReject H₀: There is a significant association between gender and tenure.")
else:
    print("\nFail to reject H₀: There is no significant association between gender and tenure.")

Contingency Table:
 tenure  no  yes
gender         
female  50  145
male    52  216

Chi-square Statistic: 2.20678166999886
Degrees of Freedom: 1
P-value: 0.1374050603563787

Fail to reject H₀: There is no significant association between gender and tenure.


Conclusion:
Based on the Chi-square test of independence, we conclude that there is no significant association between gender and tenure status of instructors.

QUES-4: Correlation: Using the teachers rating dataset, Is teaching evaluation score correlated with beauty score?
ANS- Objective:
To determine whether there is a significant relationship between an instructor’s teaching evaluation score and their beauty score,
using a correlation analysis.

Hypotheses:
Null Hypothesis (H₀):
There is no correlation between beauty score and teaching evaluation score.

Alternative Hypothesis (H₁):
There is a correlation between beauty score and teaching evaluation score.

Methodology:
We will use the Pearson correlation coefficient (r) to measure the strength and direction of the linear relationship between the two continuous variables.

In [21]:
import pandas as pd
from scipy.stats import pearsonr
df = pd.read_csv(r"C:\Users\lavan\Downloads\TeachingRatings.csv")

x = df['beauty']
y = df['eval']
corr_coeff, p_val = pearsonr(x, y)

print("Correlation Coefficient (r):", corr_coeff)
print("P-value:", p_val)
alpha = 0.05
if p_val < alpha:
    print("Reject H₀: There is a significant correlation between beauty and teaching evaluation.")
else:
    print("Fail to reject H₀: There is no significant correlation between beauty and teaching evaluation.")


Correlation Coefficient (r): 0.18903909062278274
P-value: 4.247115507786092e-05
Reject H₀: There is a significant correlation between beauty and teaching evaluation.


Conclusion:
Based on the Pearson correlation analysis, we reject the null hypothesis.
There is a significant positive correlation between an instructor’s beauty score and their teaching evaluation score.