In [2]:
import pandas as pd
import scipy.stats as stats
import numpy as np

encoded_file_path = 'StudentPerformanceFactors_Encoded.csv'
encoded_data = pd.read_csv(encoded_file_path)

target = 'Exam_Score'
features = [col for col in encoded_data.columns if col != target]


hypothesis_test_results = {}

for feature in features:
    unique_values = encoded_data[feature].nunique()
    
    if unique_values > 2: 
        if encoded_data[feature].dtype in [np.float64, np.int64]: 
            corr, p_value = stats.pearsonr(encoded_data[feature], encoded_data[target])
            hypothesis_test_results[feature] = {
                'Test': 'Pearson correlation',
                'Correlation coefficient': corr,
                'p-value': p_value,
                'Significant': p_value < 0.05
            }
    elif unique_values == 2:
        group1 = encoded_data[encoded_data[feature] == encoded_data[feature].unique()[0]][target]
        group2 = encoded_data[encoded_data[feature] == encoded_data[feature].unique()[1]][target]
        t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)  # Welch's t-test
        hypothesis_test_results[feature] = {
            'Test': 't-test',
            't-statistic': t_stat,
            'p-value': p_value,
            'Significant': p_value < 0.05
        }
    else:
        hypothesis_test_results[feature] = {
            'Test': 'Not applicable',
            'p-value': None,
            'Significant': None
        }


results_df = pd.DataFrame(hypothesis_test_results).T
print(results_df)

                                           Test Correlation coefficient  \
Hours_Studied               Pearson correlation                0.511714   
Attendance                  Pearson correlation                0.680785   
Parental_Involvement        Pearson correlation               -0.100786   
Access_to_Resources         Pearson correlation               -0.108123   
Extracurricular_Activities               t-test                     NaN   
Sleep_Hours                 Pearson correlation                -0.01648   
Previous_Scores             Pearson correlation                0.205875   
Motivation_Level            Pearson correlation               -0.012155   
Internet_Access                  Not applicable                     NaN   
Tutoring_Sessions           Pearson correlation                0.166892   
Family_Income               Pearson correlation               -0.021116   
Teacher_Quality             Pearson correlation               -0.063931   
School_Type              