In [2]:
import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/brobers0339/Musical_Impact_on_Mental_Health_Senior_Thesis/refs/heads/main/Music%26MentalHealthDataset.csv")

df_cleaned = df.drop(['Timestamp', 'Age', 'Primary streaming service', 'Exploratory', 'Permissions', 'BPM', 'Foreign languages'], axis=1)

df_cleaned['obs_count'] = range(1, len(df_cleaned) + 1)


In [26]:
print(df_cleaned.columns)

Index(['Hours per day', 'While working', 'Instrumentalist', 'Composer',
       'Fav genre', 'Frequency [Classical]', 'Frequency [Country]',
       'Frequency [EDM]', 'Frequency [Folk]', 'Frequency [Gospel]',
       'Frequency [Hip hop]', 'Frequency [Jazz]', 'Frequency [K pop]',
       'Frequency [Latin]', 'Frequency [Lofi]', 'Frequency [Metal]',
       'Frequency [Pop]', 'Frequency [R&B]', 'Frequency [Rap]',
       'Frequency [Rock]', 'Frequency [Video game music]', 'Anxiety',
       'Depression', 'Insomnia', 'OCD', 'Music effects', 'obs_count',
       'Effects', 'Effects_remapped'],
      dtype='object')


In [3]:
def convert_likert_to_numeric(df, freq_cols, mapping=None):
    if mapping is None:
        mapping = {
            "Never" : 0,
            "Rarely" : 1,
            "Sometimes" : 2,
            "Often" : 3,
            "Always" : 4,

        }
    df_converted = df.copy()
    for col in freq_cols:
        df_converted[col] = df_converted[col].map(mapping).fillna(0)
    
    return df_converted

In [14]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

#Ordinal Logit Regressional Model
df_cleaned['Effects_remapped'] = df_cleaned['Music effects'].map({
    'Improve' : 'Improved',
    'No effect' : 'No Effect',
    'Worsen' : 'Worsened',
    })
df_cleaned['Effects_remapped'] = df_cleaned['Effects_remapped'].fillna('Unknown')
df_cleaned['Effects_remapped_cat'] = df_cleaned['Effects_remapped'].map({
    'Improved' : 1,
    'No Effect' : 2,
    'Worsened' : 3,
    'Unknown' : 0
})

freq_cols = ['Frequency [Classical]', 
             'Frequency [Country]', 
             'Frequency [EDM]', 
             'Frequency [Folk]', 
             'Frequency [Gospel]', 
             'Frequency [Hip hop]', 
             'Frequency [Jazz]', 
             'Frequency [K pop]', 
             'Frequency [Latin]', 
             'Frequency [Lofi]', 
             'Frequency [Metal]', 
             'Frequency [Pop]', 
             'Frequency [R&B]', 
             'Frequency [Rap]', 
             'Frequency [Rock]', 
             'Frequency [Video game music]']

df_explanatory_cols = convert_likert_to_numeric(df_cleaned, freq_cols)
df_explanatory_cols = df_explanatory_cols.drop(['While working', 'Instrumentalist', 'Composer', 'Fav genre', 'Music effects', 'Effects_remapped', 'obs_count', 'Effects_remapped_cat'], axis=1)

model = OrderedModel(
    df_cleaned['Effects_remapped_cat'],
    df_explanatory_cols,
    distr='logit'
)

res = model.fit(method='bfgs')
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.676787
         Iterations: 39
         Function evaluations: 41
         Gradient evaluations: 41
                              OrderedModel Results                              
Dep. Variable:     Effects_remapped_cat   Log-Likelihood:                -498.12
Model:                     OrderedModel   AIC:                             1044.
Method:              Maximum Likelihood   BIC:                             1155.
Date:                  Thu, 02 Oct 2025                                         
Time:                          19:41:17                                         
No. Observations:                   736                                         
Df Residuals:                       712                                         
Df Model:                            21                                         
                                   coef    std err          z      P>|z|      [0.025      0.975]
--

In [15]:
#Spearman's
from scipy.stats import spearmanr

for col in df_explanatory_cols:
    corr, pval = spearmanr(df_cleaned['Effects_remapped_cat'], df_explanatory_cols[col])
    print(col, corr, pval)

Hours per day -0.07391006661895753 0.04502039465651582
Frequency [Classical] 0.015536134688809678 0.673905682670318
Frequency [Country] -0.04875197720908459 0.1864528221090422
Frequency [EDM] -0.0399276532450674 0.27934124899912105
Frequency [Folk] -0.011766926987839486 0.74995616252175
Frequency [Gospel] -0.07253474054133079 0.04917570779823887
Frequency [Hip hop] -0.08620206816973518 0.019336280033101227
Frequency [Jazz] -0.037382173262331245 0.31116373107712125
Frequency [K pop] 0.0016418661291255697 0.964532130381327
Frequency [Latin] -0.05309459161289488 0.1501542139267174
Frequency [Lofi] -0.06176678749855759 0.0940438058341394
Frequency [Metal] 0.003067292385951862 0.9337940400103282
Frequency [Pop] 0.020614355263660726 0.5765970721584182
Frequency [R&B] -0.07348633602538635 0.04626792002794286
Frequency [Rap] -0.017046370406820905 0.6442936401979953
Frequency [Rock] -0.012648220446445636 0.7319234102544904
Frequency [Video game music] -0.0631108362414013 0.08709038614949821
Anx