# Terminological Preferences Project

Import libraries

In [2]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import mannwhitneyu, norm
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.preprocessing import minmax_scale

Import data

In [18]:
term_df = pd.read_excel('word_data.xls', engine='xlrd').replace(999, np.NAN)
term_df = term_df[~term_df.Age.isna()]

Dataframe preview

In [7]:
term_df

Unnamed: 0,date,Q1a,Q1b,Q1c,Q1d,Q1e,Q1f,Q1g,Q1h,Q2a,...,Q4g,Q4h,comments,Age,Gender,Hispanic,Race,Counseling,White,Threegroup
0,,4,3,4,4,5,4,3,1,4,...,5,4,,61.0,1,2,3,1,0,2.0
1,44244.0,1,6,6,6,6,6,6,6,6,...,4,4,,40.0,1,1,6,0,0,3.0
2,44403.0,1,7,1,1,1,1,4,4,4,...,7,7,,51.0,2,1,6,1,0,3.0
3,44243.0,1,6,1,1,5,3,3,3,1,...,2,1,,59.0,1,1,6,0,0,3.0
4,44662.0,1,1,1,1,1,6,7,5,4,...,4,6,,29.0,1,2,4,1,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,44251.0,7,6,7,7,3,5,7,7,6,...,3,7,,35.0,2,2,4,1,1,1.0
346,44544.0,7,7,7,7,7,7,7,7,7,...,5,7,,43.0,2,2,5,1,0,
347,44735.0,7,6,7,2,7,1,1,1,7,...,7,7,,44.0,1,2,3,1,0,2.0
348,44699.0,7,4,4,7,7,7,7,4,7,...,4,6,,44.0,1,1,4,1,1,1.0


Helper functions

In [8]:
drop_rows = ['25%', '50%', '75%', 'min', 'max']

def trichotomize(df, df_stat):
    N_agree_list, N_neut_list, N_dis_list = [], [], []
    per_agree_list,  per_neut_list, per_dis_list = [], [], []
    for col in df.columns:
        N_dis_list.append(df[col][df[col]<4].size)
        per_dis_list.append(df[col][df[col]<4].size*100/df[col].size)
        N_neut_list.append(df[col][(df[col]>3)&(df[col]<5)].size)
        per_neut_list.append(df[col][(df[col]>3)&(df[col]<5)].size*100/df[col].size)
        N_agree_list.append(df[col][df[col]>4].size)
        per_agree_list.append(df[col][df[col]>4].size*100/df[col].size)
    L = N_agree_list, per_agree_list, N_neut_list, per_neut_list, N_dis_list, per_dis_list
    L_df = pd.DataFrame(L, columns=df.columns, index=['agreement N', 'agreement %', 'neutral N', 'neutral %', 'disagreement N', 'disagreement %']).round(2)
    return pd.concat([df_stat, L_df])

***

# Demographics

### Age

In [14]:
age_df = pd.DataFrame(term_df.Age.describe())
age_df.rename(index={'count': 'responses N', 'mean': 'mean age'}, inplace=True)
display(age_df)

Unnamed: 0,Age
responses N,348.0
mean age,42.795977
std,11.215535
min,21.0
25%,34.0
50%,41.0
75%,51.0
max,76.0


### Gender

In [19]:
gen_df = pd.DataFrame(index=['N', '%'])
term_df.loc[term_df.Gender==1, 'Gender'] = 0
term_df.loc[term_df.Gender==2, 'Gender'] = 1
m = [np.sum(term_df.Gender==0), np.sum(term_df.Gender==0)*100/len(term_df.Gender)]
f = [np.sum(term_df.Gender==1), np.sum(term_df.Gender==1)*100/len(term_df.Gender)]
gen_df['male'] = m
gen_df['female'] = f
gen_df['TOTAL'] = gen_df['male'] + gen_df['female']
display(gen_df.round(2))

Unnamed: 0,male,female,TOTAL
N,213.0,135.0,348.0
%,61.21,38.79,100.0


### Race

White

In [21]:
white_df = pd.DataFrame(index=['N', '%'])
w = [np.sum(term_df.White==1), np.sum(term_df.White==1)*100/len(term_df.White)]
nw = [np.sum(term_df.White==0), np.sum(term_df.White==0)*100/len(term_df.White)]
white_df['white'] = w
white_df['non white'] = nw
white_df['TOTAL'] = white_df['white'] + white_df['non white']
display(white_df.round(2))

Unnamed: 0,white,non white,TOTAL
N,241.0,107.0,348.0
%,69.25,30.75,100.0


Hispanic

In [22]:
hispanic_df = pd.DataFrame(index=['N', '%'])
term_df.loc[term_df.Hispanic==2, 'Hispanic'] = 0
h = [np.sum(term_df.Hispanic==1), np.sum(term_df.Hispanic==1)*100/len(term_df.Hispanic)]
nh = [np.sum(term_df.Hispanic==0), np.sum(term_df.Hispanic==0)*100/len(term_df.Hispanic)]
hispanic_df['hispanic'] = h
hispanic_df['non hispanic'] = nh
hispanic_df['TOTAL'] = hispanic_df['hispanic'] + hispanic_df['non hispanic']
display(hispanic_df.round(2))

Unnamed: 0,hispanic,non hispanic,TOTAL
N,45.0,303.0,348.0
%,12.93,87.07,100.0


Other race

In [24]:
race_df = pd.DataFrame(index=['N', '%'])
r1 = [np.sum(term_df.Race==1), np.sum(term_df.Race==1)*100/len(term_df.Race)]
r2 = [np.sum(term_df.Race==2), np.sum(term_df.Race==2)*100/len(term_df.Race)]
r3 = [np.sum(term_df.Race==3), np.sum(term_df.Race==3)*100/len(term_df.Race)]
r4 = [np.sum(term_df.Race==4), np.sum(term_df.Race==4)*100/len(term_df.Race)]
r5 = [np.sum(term_df.Race==5), np.sum(term_df.Race==5)*100/len(term_df.Race)]
r6 = [np.sum(term_df.Race==6), np.sum(term_df.Race==6)*100/len(term_df.Race)]
race_df['american indian | native alaskan'], race_df['asian'], race_df['black | african american'] = r1, r2, r3
race_df['white'], race_df['native hawaiian'], race_df['multiracial | other'] = r4, r5, r6
race_df['TOTAL'] = race_df['american indian | native alaskan'] + race_df['asian'] + race_df['black | african american'] + race_df['white'] + race_df['native hawaiian'] + race_df['multiracial | other']
display(race_df.round(2))

Unnamed: 0,american indian | native alaskan,asian,black | african american,white,native hawaiian,multiracial | other,TOTAL
N,1.0,1.0,56.0,241.0,3.0,46.0,348.0
%,0.29,0.29,16.09,69.25,0.86,13.22,100.0


***

## Question 1
**People may suddenly have vivid images of drug use even if they have not used drugs for months.  How do you prefer counselors to describe this experience?  Listed below are different phrases that counselors might use. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [27]:
q1df.describe().round(2)

Unnamed: 0,Q1a,Q1b,Q1c,Q1d,Q1e,Q1f,Q1g,Q1h
count,348.0,348.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,4.89,4.01,4.9,4.61,3.84,4.28,4.64,3.94
std,1.86,1.92,1.79,1.91,2.05,1.96,1.9,1.96
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,4.0,2.0,4.0,4.0,2.0,3.0,4.0,2.0
50%,5.0,4.0,5.0,5.0,4.0,4.0,5.0,4.0
75%,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
max,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0


In [26]:
q1df = term_df.filter(items=['Q1a', 'Q1b', 'Q1c', 'Q1d', 'Q1e', 'Q1f', 'Q1g', 'Q1h'])
q1df_stat = q1df.describe().round(2).drop(drop_rows)
q1df_stat = trichotomize(q1df, q1df_stat)
q1cols = {'Q1a': 'craving', 'Q1b': 'hunger', 'Q1c': 'urge', 'Q1d': 'desire', 'Q1e': 'jonesing', 'Q1f': 'compulsion', 'Q1g': 'impulse', 'Q1h': 'longing'}
q1df_stat.rename(columns=q1cols, index={'count': 'responses N'}, inplace=True)
display(q1df_stat)

Unnamed: 0,craving,hunger,urge,desire,jonesing,compulsion,impulse,longing
responses N,348.0,348.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,4.89,4.01,4.9,4.61,3.84,4.28,4.64,3.94
std,1.86,1.92,1.79,1.91,2.05,1.96,1.9,1.96
agreement N,207.0,131.0,202.0,176.0,127.0,158.0,181.0,124.0
agreement %,59.48,37.64,58.05,50.57,36.49,45.4,52.01,35.63
neutral N,72.0,96.0,80.0,86.0,79.0,83.0,87.0,99.0
neutral %,20.69,27.59,22.99,24.71,22.7,23.85,25.0,28.45
disagreement N,69.0,121.0,66.0,86.0,142.0,107.0,80.0,125.0
disagreement %,19.83,34.77,18.97,24.71,40.8,30.75,22.99,35.92


**Top 3 terms**

In [12]:
pd.DataFrame(q1df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
urge,4.9
craving,4.89
impulse,4.64


***

## Question 2
**How do you prefer counselors to describe the experience people may have after nonmedical opioid use (e.g., using heroin)? Listed below are different phrases that counselors might use. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [28]:
q2df = term_df.filter(items=['Q2a', 'Q2b', 'Q2c', 'Q2d', 'Q2e', 'Q2f', 'Q2g', 'Q2h', 'Q2i', 'Q2j'])
q2df_stat = q2df.describe().round(2).drop(drop_rows)
q2df_stat = trichotomize(q2df, q2df_stat)
q2cols = {'Q2a': 'high', 'Q2b': 'euphoria', 'Q2c': 'buzzed', 'Q2d': 'elated', 'Q2e': 'doped', 'Q2f': 'nodded out', 'Q2g': 'pleasureable feeling', 'Q2h': 'stoned', 'Q2i': 'intoxicated', 'Q2j': 'lit up'}
q2df_stat.rename(columns=q2cols, index={'count': 'responses N'}, inplace=True)
display(q2df_stat)

Unnamed: 0,high,euphoria,buzzed,elated,doped,nodded out,pleasureable feeling,stoned,intoxicated,lit up
responses N,348.0,348.0,348.0,348.0,348.0,347.0,347.0,348.0,348.0,346.0
mean,5.03,4.38,4.2,3.91,3.61,4.0,4.28,4.21,4.58,3.5
std,1.88,1.93,1.87,1.88,1.96,2.04,1.92,1.93,1.92,1.94
agreement N,220.0,156.0,143.0,115.0,102.0,138.0,152.0,140.0,177.0,96.0
agreement %,63.22,44.83,41.09,33.05,29.31,39.66,43.68,40.23,50.86,27.59
neutral N,70.0,98.0,103.0,115.0,94.0,88.0,94.0,102.0,92.0,92.0
neutral %,20.11,28.16,29.6,33.05,27.01,25.29,27.01,29.31,26.44,26.44
disagreement N,58.0,94.0,102.0,118.0,152.0,121.0,101.0,106.0,79.0,158.0
disagreement %,16.67,27.01,29.31,33.91,43.68,34.77,29.02,30.46,22.7,45.4


**Top 3 terms**

In [29]:
pd.DataFrame(q2df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
high,5.03
intoxicated,4.58
euphoria,4.38


***

## Question 3
**Patients who miss a daily methadone dose may experience flu-like symptoms. Listed below are different phrases that counselors might use to describe this experience. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [30]:
q3df = term_df.filter(items=['Q3a', 'Q3b', 'Q3c', 'Q3d', 'Q3e', 'Q3f', 'Q3g', 'Q3h'])
q3df_stat = q3df.describe().round(2).drop(drop_rows)
q3df_stat = trichotomize(q3df, q3df_stat)
q3cols = {'Q3a': 'withdrawal', 'Q3b': 'withdrawal symptoms', 'Q3c': 'sick', 'Q3d': 'agonies', 'Q3e': 'jonesing','Q3f': 'dope sick'}
q3df_stat.rename(columns=q3cols, index={'count': 'responses N'}, inplace=True)
display(q3df_stat)

Unnamed: 0,withdrawal,withdrawal symptoms,sick,agonies,jonesing,dope sick
responses N,348.0,348.0,348.0,348.0,347.0,346.0
mean,5.31,5.32,5.11,3.9,3.67,4.25
std,1.8,1.79,1.86,1.99,2.01,2.15
agreement N,237.0,238.0,218.0,120.0,110.0,155.0
agreement %,68.1,68.39,62.64,34.48,31.61,44.54
neutral N,64.0,61.0,74.0,104.0,86.0,71.0
neutral %,18.39,17.53,21.26,29.89,24.71,20.4
disagreement N,47.0,49.0,56.0,124.0,151.0,120.0
disagreement %,13.51,14.08,16.09,35.63,43.39,34.48


**Top 3 terms**

In [31]:
pd.DataFrame(q3df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
withdrawal symptoms,5.32
withdrawal,5.31
sick,5.11


***

## Question 4
**There are different ways to understand and treat the problems you are seeking help for. Listed below are different phrases describing beliefs about your problems and treatment.  For each phrase, write the appropriate number (1-7) in the column on the right to indicate your level of agreement.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

<br>

| Key      | Description |
| :---     |    :----  |
| A        | “My problem is a chronic medical condition similar to high blood pressure”       |
| B        | “My problem is a brain disease”       |
| C        | “There is no explanation for my problem”       |
| D        | “My treatment at APT is likely to be effective”       |
| E        | “Taking methadone daily is important for my treatment”       |
| F        | “Counseling is an important part of my treatment”       |
| G        | “12-step (e.g., AA, NA)  is the best treatment for me”       |
| H        | “Methadone is a life-saving medication”       |


In [32]:
q4df = term_df.filter(items=['Q4a', 'Q4b', 'Q4c', 'Q4d', 'Q4e', 'Q4f', 'Q4g', 'Q4h'])
q4df_stat = q4df.describe().round(2).drop(drop_rows)
q4df_stat = trichotomize(q4df, q4df_stat)
q4cols = {'Q4a': 'A', 'Q4b': 'B', 'Q4c': 'C', 'Q4d': 'D', 'Q4e': 'E', 'Q4f': 'F', 'Q4g': 'G', 'Q4h': 'H'}
q4df_stat.rename(columns=q4cols, index={'count': 'responses N'}, inplace=True)
display(q4df_stat)

Unnamed: 0,A,B,C,D,E,F,G,H
responses N,346.0,347.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,3.86,4.07,2.98,5.54,5.87,5.56,4.09,5.63
std,2.18,2.13,1.88,1.72,1.62,1.71,1.97,1.76
agreement N,140.0,153.0,71.0,266.0,281.0,266.0,137.0,260.0
agreement %,40.23,43.97,20.4,76.44,80.75,76.44,39.37,74.71
neutral N,63.0,72.0,72.0,40.0,35.0,41.0,96.0,48.0
neutral %,18.1,20.69,20.69,11.49,10.06,11.78,27.59,13.79
disagreement N,143.0,122.0,205.0,42.0,32.0,41.0,115.0,40.0
disagreement %,41.09,35.06,58.91,12.07,9.2,11.78,33.05,11.49


In [33]:
pd.DataFrame(q4df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
E,5.87
H,5.63
F,5.56


***

# Advanced statistics

### Additional preprocessing

<b>Preprocessing function</b>
<br><code>df</code>: pandas dataframe containing all data
<br><code>col_dict</code>: dictionary with column name (key, e.g. 'q1a') / response (value, e.g. 'urge') pair
<br><code>addl_cols</code>: variables of interest for regression

Run via <code>run_logit</code>, described below.

In [68]:
# trichotomize responses into disagree (1), neutral (2), agree (3) and standardize age data
def preprocess_stat(df, col_dict, addl_cols):
    new_df = df[~df.Age.isna()]
    new_df['Black'] = (new_df['Race']==3).astype(int)
    new_df['Other'] = ((new_df['Race']!=3)&(new_df['Race']!=4)).astype(int)
    col_names = list(col_dict.keys())
    new_df = new_df[col_names+addl_cols]
    #trichotomize
    for col in col_names:
        new_df.loc[new_df[col] < 4, col] = 1
        new_df.loc[new_df[col] == 4, col] = 2
        new_df.loc[new_df[col] > 4, col] = 3
    new_df = new_df
    #standardize age data
    new_df.Age = minmax_scale(new_df.Age)
    return new_df

# term_df.Q1a.value_counts().sort_index().plot.bar()

<b>Regression function</b>: 
<br>Runs logit regression for a <code>term</code> of interest on indpendent variables in list of <code>vars</code>. Utilizes <code>preprocess_stat</code>, described above.
<br>
<br><code>df</code>: data (dataframe)
<br><code>term</code>: term of interest (e.g. 'urge')
<br><code>col_dict</code>: dictionary with column name (key, e.g. 'q1a') / response (value, e.g. 'urge') pair
<br><code>vars</code>: list of independent variables of interest for regression

In [79]:
def run_logit(df, term, col_dict, vars):
    logit_df = preprocess_stat(df, col_dict, vars)
    dep_var = list(col_dict.keys())[list(col_dict.values()).index(term)]
    mod_prob = OrderedModel(logit_df[dep_var], logit_df[vars], distr='probit')
    res_prob = mod_prob.fit(method='bfgs')
    print(res_prob.summary())
    return logit_df


***

<h2>Question 1

<b>'Urge' regression

In [80]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'urge', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.963107
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q1c   Log-Likelihood:                -335.16
Model:                   OrderedModel   AIC:                             684.3
Method:            Maximum Likelihood   BIC:                             711.3
Date:                Wed, 08 May 2024                                         
Time:                        13:14:22                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b> 'Craving' regression

In [81]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'craving', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.949846
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q1a   Log-Likelihood:                -330.55
Model:                   OrderedModel   AIC:                             675.1
Method:            Maximum Likelihood   BIC:                             702.1
Date:                Wed, 08 May 2024                                         
Time:                        13:14:24                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b>'Impulse' regression

In [82]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'impulse', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.012575
         Iterations: 27
         Function evaluations: 28
         Gradient evaluations: 28
                             OrderedModel Results                             
Dep. Variable:                    Q1g   Log-Likelihood:                -352.38
Model:                   OrderedModel   AIC:                             718.8
Method:            Maximum Likelihood   BIC:                             745.7
Date:                Wed, 08 May 2024                                         
Time:                        13:14:26                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<h2>Question 2

<b>'High' regression

In [83]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'high', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.872336
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q2a   Log-Likelihood:                -303.57
Model:                   OrderedModel   AIC:                             621.1
Method:            Maximum Likelihood   BIC:                             648.1
Date:                Wed, 08 May 2024                                         
Time:                        13:14:28                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b>'Intoxicated' regression

In [84]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'intoxicated', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.012143
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q2i   Log-Likelihood:                -352.23
Model:                   OrderedModel   AIC:                             718.5
Method:            Maximum Likelihood   BIC:                             745.4
Date:                Wed, 08 May 2024                                         
Time:                        13:14:35                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b>'Euphoria' regression

In [86]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'euphoria', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.028101
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q2b   Log-Likelihood:                -357.78
Model:                   OrderedModel   AIC:                             729.6
Method:            Maximum Likelihood   BIC:                             756.5
Date:                Wed, 08 May 2024                                         
Time:                        13:15:06                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<h2>Question 3

<b>'Withdrawal symptoms' regression

In [88]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'withdrawal symptoms', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.808609
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q3b   Log-Likelihood:                -281.40
Model:                   OrderedModel   AIC:                             576.8
Method:            Maximum Likelihood   BIC:                             603.8
Date:                Wed, 08 May 2024                                         
Time:                        13:17:24                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b>'Withdrawal' regression

In [90]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'withdrawal', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.819902
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q3a   Log-Likelihood:                -285.33
Model:                   OrderedModel   AIC:                             584.7
Method:            Maximum Likelihood   BIC:                             611.6
Date:                Wed, 08 May 2024                                         
Time:                        13:17:52                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

<b>'Sick' regression

In [89]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'sick', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.898459
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q3c   Log-Likelihood:                -312.66
Model:                   OrderedModel   AIC:                             639.3
Method:            Maximum Likelihood   BIC:                             666.3
Date:                Wed, 08 May 2024                                         
Time:                        13:17:50                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

***

# Footnotes

Sullivan GM, Artino AR Jr. Analyzing and interpreting data from likert-type scales. J Grad Med Educ. 2013 Dec;5(4):541-2. doi: 10.4300/JGME-5-4-18. PMID: 24454995; PMCID: PMC3886444.
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3886444/

https://www.nacaa.com/file.ashx?id=c1f53033-a29a-4b7b-b9f0-a00b1fe3ff74