# Computational Statistics - Project
## Authors : Aurélie Wasem & Marcelo Tavares

In [4]:
import pandas as pd
import numpy as np
from scipy import stats

In [3]:
train_df = pd.read_csv('data/train.csv')

In [5]:
lst = ['SalePrice', 'GrLivArea', 'LotArea', 'OverallQual',
       'YearBuilt', 'TotalBsmtSF', 'GarageArea', '1stFlrSF',
       '2ndFlrSF', 'FullBath', 'HalfBath', 'BedroomAbvGr',  
       'TotRmsAbvGrd', 'WoodDeckSF', 'ScreenPorch', 'PoolArea']

In [None]:
# Préparation d'une liste pour stocker les résultats
results = []

# Pour chaque variable
for var in lst:
    data = train_df[var].dropna()  # On enlève les valeurs manquantes
    mean = data.mean()
    variance = data.var()
    n = len(data)
    std_err = stats.sem(data)
    
    # Intervalle de confiance à 95%
    ci_low, ci_high = stats.t.interval(0.95, df=n-1, loc=mean, scale=std_err)
    
    # Test d'hypothèse: H0 : mean = median
    median = np.median(data)
    t_stat, p_value = stats.ttest_1samp(data, median)
    
    results.append({
        'Variable': var,
        'Mean': mean,
        'Variance': variance,
        'CI 95% Lower': ci_low,
        'CI 95% Upper': ci_high,
        'T-statistic (mean=median)': t_stat,
        'P-value': p_value
    })

# Résultats dans un DataFrame
results_df = pd.DataFrame(results)
results_df['Significatif ?'] = results_df['P-value'].apply(lambda p: 'Oui' if p < 0.05 else 'Non')
results_df

Unnamed: 0,Variable,Mean,Variance,CI 95% Lower,CI 95% Upper,T-statistic (mean=median),P-value,Significatif ?
0,SalePrice,180921.19589,6311111000.0,176842.841041,184999.55074,8.619667,1.7183990000000002e-17,Oui
1,GrLivArea,1515.463699,276129.6,1488.487012,1542.440385,3.742148,0.0001895319,Oui
2,LotArea,10516.828082,99625650.0,10004.41799,11029.238175,3.974893,7.385395e-05,Oui
3,OverallQual,6.099315,1.912679,6.028316,6.170314,2.743914,0.006145517,Oui
4,YearBuilt,1971.267808,912.2154,1969.717276,1972.81834,-2.19141,0.02857975,Oui
5,TotalBsmtSF,1057.429452,192462.4,1034.907554,1079.951351,5.742262,1.134867e-08,Oui
6,GarageArea,472.980137,45712.51,462.003997,483.956277,-1.254549,0.2098436,Non
7,1stFlrSF,1162.626712,149450.1,1142.780384,1182.47304,7.474869,1.326539e-13,Oui
8,2ndFlrSF,346.992466,190557.1,324.582323,369.402609,30.372737,1.9530890000000002e-157,Oui
9,FullBath,1.565068,0.3035082,1.536786,1.593351,-30.165607,9.227600000000001e-156,Oui


: 