In [1]:
import pandas as pd
from scipy.stats import pointbiserialr
import plotly.express as px

In [2]:
file_path = "data/voice.csv"  
data = pd.read_csv(file_path)

data['label'] = data['label'].map({'male': 1, 'female': 0})


In [3]:
numerical_columns = data.select_dtypes(include=['float64', 'int64']).columns

correlations = {}
for col in numerical_columns:
    if col != 'label':  
        corr, p_value = pointbiserialr(data['label'], data[col])
        correlations[col] = {'correlation': corr, 'p_value': p_value}

correlation_df = pd.DataFrame(correlations).T
print(correlation_df)

          correlation        p_value
meanfreq    -0.337415   3.368951e-85
sd           0.479539  6.654756e-182
median      -0.283919   8.259210e-60
Q25         -0.511455  9.140832e-211
Q75          0.066906   1.642021e-04
IQR          0.618916   0.000000e+00
skew         0.036627   3.926293e-02
kurt         0.087195   8.869557e-07
sp.ent       0.490552  1.614016e-191
sfm          0.357499   3.877715e-96
mode        -0.171775   2.097044e-22
centroid    -0.337415   3.368951e-85
meanfun     -0.833921   0.000000e+00
minfun      -0.136692   1.101400e-14
maxfun      -0.166461   4.044625e-21
meandom     -0.191067   1.992966e-27
mindom      -0.194974   1.636130e-28
maxdom      -0.195657   1.050986e-28
dfrange     -0.192213   9.626061e-28
modindx      0.030801   8.303136e-02


In [4]:
# best features : interquantile range (IRQ), spectral entropy (sp.ent), spectral flatness (sfm)

In [5]:
fig = px.box(
    data,
    x="label", 
    y="IQR", 
    color="label",  
    title="Boxplot de IQR par Label",
    labels={"label": "Label", "IQR": "IQR"},
    template="plotly_white"  
)

fig.show()

In [6]:
fig = px.box(
    data,
    x="label", 
    y="sfm", 
    color="label",  
    title="Boxplot de Spectral Flatness par Label",
    labels={"label": "Label", "SFM": "SFM"},
    template="plotly_white"  
)

fig.show()

In [7]:
fig = px.box(
    data,
    x="label", 
    y="sp.ent", 
    color="label",  
    title="Boxplot de Spectral Entropy par Label",
    labels={"label": "Label", "Spectral entropy": "Spectral entropy"},
    template="plotly_white"  
)

fig.show()