# Terminological Preferences Project

Import libraries

In [52]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import mannwhitneyu, norm
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.preprocessing import minmax_scale

Import data

In [53]:
term_df = pd.read_excel('word_data.xls', engine='xlrd').replace(999, np.NAN)
term_df = term_df[~term_df.Age.isna()]

Dataframe preview

In [54]:
term_df

Unnamed: 0,date,Q1a,Q1b,Q1c,Q1d,Q1e,Q1f,Q1g,Q1h,Q2a,...,Q4g,Q4h,comments,Age,Gender,Hispanic,Race,Counseling,White,Threegroup
0,,4,3,4,4,5,4,3,1,4,...,5,4,,61.0,1,2,3,1,0,2.0
1,44244.0,1,6,6,6,6,6,6,6,6,...,4,4,,40.0,1,1,6,0,0,3.0
2,44403.0,1,7,1,1,1,1,4,4,4,...,7,7,,51.0,2,1,6,1,0,3.0
3,44243.0,1,6,1,1,5,3,3,3,1,...,2,1,,59.0,1,1,6,0,0,3.0
4,44662.0,1,1,1,1,1,6,7,5,4,...,4,6,,29.0,1,2,4,1,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,44251.0,7,6,7,7,3,5,7,7,6,...,3,7,,35.0,2,2,4,1,1,1.0
346,44544.0,7,7,7,7,7,7,7,7,7,...,5,7,,43.0,2,2,5,1,0,
347,44735.0,7,6,7,2,7,1,1,1,7,...,7,7,,44.0,1,2,3,1,0,2.0
348,44699.0,7,4,4,7,7,7,7,4,7,...,4,6,,44.0,1,1,4,1,1,1.0


Helper functions

In [55]:
drop_rows = ['25%', '50%', '75%', 'min', 'max']

def trichotomize(df, df_stat):
    N_agree_list, N_neut_list, N_dis_list = [], [], []
    per_agree_list,  per_neut_list, per_dis_list = [], [], []
    for col in df.columns:
        N_dis_list.append(df[col][df[col]<4].size)
        per_dis_list.append(df[col][df[col]<4].size*100/df[col].size)
        N_neut_list.append(df[col][(df[col]>3)&(df[col]<5)].size)
        per_neut_list.append(df[col][(df[col]>3)&(df[col]<5)].size*100/df[col].size)
        N_agree_list.append(df[col][df[col]>4].size)
        per_agree_list.append(df[col][df[col]>4].size*100/df[col].size)
    L = N_agree_list, per_agree_list, N_neut_list, per_neut_list, N_dis_list, per_dis_list
    L_df = pd.DataFrame(L, columns=df.columns, index=['agreement N', 'agreement %', 'neutral N', 'neutral %', 'disagreement N', 'disagreement %']).round(2)
    return pd.concat([df_stat, L_df])

def subgroup(df):
    new_df = pd.concat([df, term_df.filter(items=['Age', 'Gender', 'White', 'Hispanic', 'Race'])], axis=1)
    new_df['Black'] = (new_df['Race']==3).astype(int)
    new_df['Other'] = ((new_df['Race']!=3)&(new_df['Race']!=4)).astype(int)
    return new_df

***

# Demographics

### Age

In [56]:
age_df = pd.DataFrame(term_df.Age.describe())
age_df.rename(index={'count': 'responses N', 'mean': 'mean age'}, inplace=True)
display(age_df)

Unnamed: 0,Age
responses N,348.0
mean age,42.795977
std,11.215535
min,21.0
25%,34.0
50%,41.0
75%,51.0
max,76.0


### Gender

In [57]:
gen_df = pd.DataFrame(index=['N', '%'])
term_df.loc[term_df.Gender==1, 'Gender'] = 0
term_df.loc[term_df.Gender==2, 'Gender'] = 1
m = [np.sum(term_df.Gender==0), np.sum(term_df.Gender==0)*100/len(term_df.Gender)]
f = [np.sum(term_df.Gender==1), np.sum(term_df.Gender==1)*100/len(term_df.Gender)]
gen_df['male'] = m
gen_df['female'] = f
gen_df['TOTAL'] = gen_df['male'] + gen_df['female']
display(gen_df.round(2))

Unnamed: 0,male,female,TOTAL
N,213.0,135.0,348.0
%,61.21,38.79,100.0


### Race

White

In [58]:
white_df = pd.DataFrame(index=['N', '%'])
w = [np.sum(term_df.White==1), np.sum(term_df.White==1)*100/len(term_df.White)]
nw = [np.sum(term_df.White==0), np.sum(term_df.White==0)*100/len(term_df.White)]
white_df['white'] = w
white_df['non white'] = nw
white_df['TOTAL'] = white_df['white'] + white_df['non white']
display(white_df.round(2))

Unnamed: 0,white,non white,TOTAL
N,241.0,107.0,348.0
%,69.25,30.75,100.0


Hispanic

In [59]:
hispanic_df = pd.DataFrame(index=['N', '%'])
term_df.loc[term_df.Hispanic==2, 'Hispanic'] = 0
h = [np.sum(term_df.Hispanic==1), np.sum(term_df.Hispanic==1)*100/len(term_df.Hispanic)]
nh = [np.sum(term_df.Hispanic==0), np.sum(term_df.Hispanic==0)*100/len(term_df.Hispanic)]
hispanic_df['hispanic'] = h
hispanic_df['non hispanic'] = nh
hispanic_df['TOTAL'] = hispanic_df['hispanic'] + hispanic_df['non hispanic']
display(hispanic_df.round(2))

Unnamed: 0,hispanic,non hispanic,TOTAL
N,45.0,303.0,348.0
%,12.93,87.07,100.0


Other race

In [60]:
race_df = pd.DataFrame(index=['N', '%'])
r1 = [np.sum(term_df.Race==1), np.sum(term_df.Race==1)*100/len(term_df.Race)]
r2 = [np.sum(term_df.Race==2), np.sum(term_df.Race==2)*100/len(term_df.Race)]
r3 = [np.sum(term_df.Race==3), np.sum(term_df.Race==3)*100/len(term_df.Race)]
r4 = [np.sum(term_df.Race==4), np.sum(term_df.Race==4)*100/len(term_df.Race)]
r5 = [np.sum(term_df.Race==5), np.sum(term_df.Race==5)*100/len(term_df.Race)]
r6 = [np.sum(term_df.Race==6), np.sum(term_df.Race==6)*100/len(term_df.Race)]
race_df['american indian | native alaskan'], race_df['asian'], race_df['black | african american'] = r1, r2, r3
race_df['white'], race_df['native hawaiian'], race_df['multiracial | other'] = r4, r5, r6
race_df['TOTAL'] = race_df['american indian | native alaskan'] + race_df['asian'] + race_df['black | african american'] + race_df['white'] + race_df['native hawaiian'] + race_df['multiracial | other']
display(race_df.round(2))

Unnamed: 0,american indian | native alaskan,asian,black | african american,white,native hawaiian,multiracial | other,TOTAL
N,1.0,1.0,56.0,241.0,3.0,46.0,348.0
%,0.29,0.29,16.09,69.25,0.86,13.22,100.0


***

## Question 1
**People may suddenly have vivid images of drug use even if they have not used drugs for months.  How do you prefer counselors to describe this experience?  Listed below are different phrases that counselors might use. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [61]:
q1df.describe().round(2)

Unnamed: 0,Q1a,Q1b,Q1c,Q1d,Q1e,Q1f,Q1g,Q1h
count,348.0,348.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,4.89,4.01,4.9,4.61,3.84,4.28,4.64,3.94
std,1.86,1.92,1.79,1.91,2.05,1.96,1.9,1.96
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,4.0,2.0,4.0,4.0,2.0,3.0,4.0,2.0
50%,5.0,4.0,5.0,5.0,4.0,4.0,5.0,4.0
75%,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
max,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0


In [62]:
q1df = term_df.filter(items=['Q1a', 'Q1b', 'Q1c', 'Q1d', 'Q1e', 'Q1f', 'Q1g', 'Q1h'])
q1df_stat = q1df.describe().round(2).drop(drop_rows)
q1df_stat = trichotomize(q1df, q1df_stat)
q1cols = {'Q1a': 'craving', 'Q1b': 'hunger', 'Q1c': 'urge', 'Q1d': 'desire', 'Q1e': 'jonesing', 'Q1f': 'compulsion', 'Q1g': 'impulse', 'Q1h': 'longing'}
q1df_stat.rename(columns=q1cols, index={'count': 'responses N'}, inplace=True)
display(q1df_stat)

Unnamed: 0,craving,hunger,urge,desire,jonesing,compulsion,impulse,longing
responses N,348.0,348.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,4.89,4.01,4.9,4.61,3.84,4.28,4.64,3.94
std,1.86,1.92,1.79,1.91,2.05,1.96,1.9,1.96
agreement N,207.0,131.0,202.0,176.0,127.0,158.0,181.0,124.0
agreement %,59.48,37.64,58.05,50.57,36.49,45.4,52.01,35.63
neutral N,72.0,96.0,80.0,86.0,79.0,83.0,87.0,99.0
neutral %,20.69,27.59,22.99,24.71,22.7,23.85,25.0,28.45
disagreement N,69.0,121.0,66.0,86.0,142.0,107.0,80.0,125.0
disagreement %,19.83,34.77,18.97,24.71,40.8,30.75,22.99,35.92


**Top 3 terms**

In [63]:
pd.DataFrame(q1df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
urge,4.9
craving,4.89
impulse,4.64


***

## Question 2
**How do you prefer counselors to describe the experience people may have after nonmedical opioid use (e.g., using heroin)? Listed below are different phrases that counselors might use. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [64]:
q2df = term_df.filter(items=['Q2a', 'Q2b', 'Q2c', 'Q2d', 'Q2e', 'Q2f', 'Q2g', 'Q2h', 'Q2i', 'Q2j'])
q2df_stat = q2df.describe().round(2).drop(drop_rows)
q2df_stat = trichotomize(q2df, q2df_stat)
q2cols = {'Q2a': 'high', 'Q2b': 'euphoria', 'Q2c': 'buzzed', 'Q2d': 'elated', 'Q2e': 'doped', 'Q2f': 'nodded out', 'Q2g': 'pleasureable feeling', 'Q2h': 'stoned', 'Q2i': 'intoxicated', 'Q2j': 'lit up'}
q2df_stat.rename(columns=q2cols, index={'count': 'responses N'}, inplace=True)
display(q2df_stat)

Unnamed: 0,high,euphoria,buzzed,elated,doped,nodded out,pleasureable feeling,stoned,intoxicated,lit up
responses N,348.0,348.0,348.0,348.0,348.0,347.0,347.0,348.0,348.0,346.0
mean,5.03,4.38,4.2,3.91,3.61,4.0,4.28,4.21,4.58,3.5
std,1.88,1.93,1.87,1.88,1.96,2.04,1.92,1.93,1.92,1.94
agreement N,220.0,156.0,143.0,115.0,102.0,138.0,152.0,140.0,177.0,96.0
agreement %,63.22,44.83,41.09,33.05,29.31,39.66,43.68,40.23,50.86,27.59
neutral N,70.0,98.0,103.0,115.0,94.0,88.0,94.0,102.0,92.0,92.0
neutral %,20.11,28.16,29.6,33.05,27.01,25.29,27.01,29.31,26.44,26.44
disagreement N,58.0,94.0,102.0,118.0,152.0,121.0,101.0,106.0,79.0,158.0
disagreement %,16.67,27.01,29.31,33.91,43.68,34.77,29.02,30.46,22.7,45.4


**Top 3 terms**

In [65]:
pd.DataFrame(q2df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
high,5.03
intoxicated,4.58
euphoria,4.38


**Subgroup analysis: 'high' and gender**

In [66]:
q2sub = subgroup(q2df)
print("Male mean agreement for 'high'")
display(q2sub.Q2a[q2sub.Gender==0].describe().round(2).drop(drop_rows))
print("Female mean agreement for 'high'")
display(q2sub.Q2a[q2sub.Gender==1].describe().round(2).drop(drop_rows))

Male mean agreement for 'high'


count    213.00
mean       5.16
std        1.82
Name: Q2a, dtype: float64

Female mean agreement for 'high'


count    135.00
mean       4.81
std        1.96
Name: Q2a, dtype: float64

**Subgroup analysis: 'intoxicated' and hispanic**

In [67]:
print("Non-hispanic mean agreement for 'intoxicated'")
display(q2sub.Q2i[q2sub.Hispanic==0].describe().round(2).drop(drop_rows))
print("Hispanic mean agreement for 'intoxicated'")
display(q2sub.Q2i[q2sub.Hispanic==1].describe().round(2).drop(drop_rows))

Non-hispanic mean agreement for 'intoxicated'


count    303.00
mean       4.56
std        1.92
Name: Q2i, dtype: float64

Hispanic mean agreement for 'intoxicated'


count    45.00
mean      4.71
std       1.93
Name: Q2i, dtype: float64

**Subgroup analysis: 'intoxicated' and race**

In [100]:
print("White race mean agreement for 'intoxicated'")
display(q2sub.Q2i[q2sub.White==1].describe().round(2).drop(drop_rows))
print("All other race mean agreement for 'intoxicated'")
display(q2sub.Q2i[q2sub.Other==1].describe().round(2).drop(drop_rows))

White race mean agreement for 'intoxicated'


count    241.00
mean       4.74
std        1.89
Name: Q2i, dtype: float64

All other race mean agreement for 'intoxicated'


count    51.00
mean      4.29
std       2.00
Name: Q2i, dtype: float64

**Subgroup analysis: 'euphoria' and gender**

In [70]:
print("Male mean agreement for 'euphoria'")
display(q2sub.Q2b[q2sub.Gender==0].describe().round(2).drop(drop_rows))
print("Female mean agreement for 'euphoria'")
display(q2sub.Q2b[q2sub.Gender==1].describe().round(2).drop(drop_rows))

Male mean agreement for 'euphoria'


count    213.00
mean       4.54
std        1.94
Name: Q2b, dtype: float64

Female mean agreement for 'euphoria'


count    135.00
mean       4.12
std        1.89
Name: Q2b, dtype: float64

**Subgroup analysis: 'euphoria' and race**

In [71]:
print("White race mean agreement for 'euphoria'")
display(q2sub.Q2b[q2sub.White==1].describe().round(2).drop(drop_rows))
print("All other race mean agreement for 'euphoria'")
display(q2sub.Q2b[q2sub.Other==1].describe().round(2).drop(drop_rows))

White race mean agreement for 'euphoria'


count    241.00
mean       4.71
std        1.88
Name: Q2b, dtype: float64

All other race mean agreement for 'euphoria'


count    51.00
mean      3.78
std       2.02
Name: Q2b, dtype: float64

***

## Question 3
**Patients who miss a daily methadone dose may experience flu-like symptoms. Listed below are different phrases that counselors might use to describe this experience. For each phrase, write the appropriate number (1-7) in the column on the right to indicate your preference.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

In [72]:
q3df = term_df.filter(items=['Q3a', 'Q3b', 'Q3c', 'Q3d', 'Q3e', 'Q3f', 'Q3g', 'Q3h'])
q3df_stat = q3df.describe().round(2).drop(drop_rows)
q3df_stat = trichotomize(q3df, q3df_stat)
q3cols = {'Q3a': 'withdrawal', 'Q3b': 'withdrawal symptoms', 'Q3c': 'sick', 'Q3d': 'agonies', 'Q3e': 'jonesing','Q3f': 'dope sick'}
q3df_stat.rename(columns=q3cols, index={'count': 'responses N'}, inplace=True)
display(q3df_stat)

Unnamed: 0,withdrawal,withdrawal symptoms,sick,agonies,jonesing,dope sick
responses N,348.0,348.0,348.0,348.0,347.0,346.0
mean,5.31,5.32,5.11,3.9,3.67,4.25
std,1.8,1.79,1.86,1.99,2.01,2.15
agreement N,237.0,238.0,218.0,120.0,110.0,155.0
agreement %,68.1,68.39,62.64,34.48,31.61,44.54
neutral N,64.0,61.0,74.0,104.0,86.0,71.0
neutral %,18.39,17.53,21.26,29.89,24.71,20.4
disagreement N,47.0,49.0,56.0,124.0,151.0,120.0
disagreement %,13.51,14.08,16.09,35.63,43.39,34.48


**Top 3 terms**

In [73]:
pd.DataFrame(q3df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
withdrawal symptoms,5.32
withdrawal,5.31
sick,5.11


**Subgroup analysis: 'sick' and race**

In [75]:
q3sub = subgroup(q3df)
print("White race mean agreement for 'sick'")
display(q3sub.Q3c[q3sub.White==1].describe().round(2).drop(drop_rows))
print("All other race mean agreement for 'sick'")
display(q3sub.Q3c[q3sub.Other==1].describe().round(2).drop(drop_rows))

White race mean agreement for 'sick'


count    241.00
mean       5.28
std        1.83
Name: Q3c, dtype: float64

All other race mean agreement for 'sick'


count    51.00
mean      4.88
std       1.91
Name: Q3c, dtype: float64

***

## Question 4
**There are different ways to understand and treat the problems you are seeking help for. Listed below are different phrases describing beliefs about your problems and treatment.  For each phrase, write the appropriate number (1-7) in the column on the right to indicate your level of agreement.**

1. Strongly disagree
2. Disagree
3. Somewhat disagree
4. Don't agree or disagree
5. Somewhat agree
6. Agree
7. Strongly agree

<br>

| Key      | Description |
| :---     |    :----  |
| A        | “My problem is a chronic medical condition similar to high blood pressure”       |
| B        | “My problem is a brain disease”       |
| C        | “There is no explanation for my problem”       |
| D        | “My treatment at APT is likely to be effective”       |
| E        | “Taking methadone daily is important for my treatment”       |
| F        | “Counseling is an important part of my treatment”       |
| G        | “12-step (e.g., AA, NA)  is the best treatment for me”       |
| H        | “Methadone is a life-saving medication”       |


In [76]:
q4df = term_df.filter(items=['Q4a', 'Q4b', 'Q4c', 'Q4d', 'Q4e', 'Q4f', 'Q4g', 'Q4h'])
q4df_stat = q4df.describe().round(2).drop(drop_rows)
q4df_stat = trichotomize(q4df, q4df_stat)
q4cols = {'Q4a': 'A', 'Q4b': 'B', 'Q4c': 'C', 'Q4d': 'D', 'Q4e': 'E', 'Q4f': 'F', 'Q4g': 'G', 'Q4h': 'H'}
q4df_stat.rename(columns=q4cols, index={'count': 'responses N'}, inplace=True)
display(q4df_stat)

Unnamed: 0,A,B,C,D,E,F,G,H
responses N,346.0,347.0,348.0,348.0,348.0,348.0,348.0,348.0
mean,3.86,4.07,2.98,5.54,5.87,5.56,4.09,5.63
std,2.18,2.13,1.88,1.72,1.62,1.71,1.97,1.76
agreement N,140.0,153.0,71.0,266.0,281.0,266.0,137.0,260.0
agreement %,40.23,43.97,20.4,76.44,80.75,76.44,39.37,74.71
neutral N,63.0,72.0,72.0,40.0,35.0,41.0,96.0,48.0
neutral %,18.1,20.69,20.69,11.49,10.06,11.78,27.59,13.79
disagreement N,143.0,122.0,205.0,42.0,32.0,41.0,115.0,40.0
disagreement %,41.09,35.06,58.91,12.07,9.2,11.78,33.05,11.49


In [77]:
pd.DataFrame(q4df_stat.loc['mean'].sort_values()[::-1])[0:3]

Unnamed: 0,mean
E,5.87
H,5.63
F,5.56


***

# Regression statistics

### Additional preprocessing

<b>Preprocessing function</b>
<br><code>df</code>: pandas dataframe containing all data
<br><code>col_dict</code>: dictionary with column name (key, e.g. 'q1a') / response (value, e.g. 'urge') pair
<br><code>addl_cols</code>: variables of interest for regression

Run via <code>run_logit</code>, described below.

In [78]:
# trichotomize responses into disagree (1), neutral (2), agree (3) and standardize age data
def preprocess_stat(df, col_dict, addl_cols):
    new_df = df[~df.Age.isna()]
    new_df['Black'] = (new_df['Race']==3).astype(int)
    new_df['Other'] = ((new_df['Race']!=3)&(new_df['Race']!=4)).astype(int)
    col_names = list(col_dict.keys())
    new_df = new_df[col_names+addl_cols]
    #trichotomize
    for col in col_names:
        new_df.loc[new_df[col] < 4, col] = 1
        new_df.loc[new_df[col] == 4, col] = 2
        new_df.loc[new_df[col] > 4, col] = 3
    new_df = new_df
    #standardize age data
    new_df.Age = minmax_scale(new_df.Age)
    return new_df

# term_df.Q1a.value_counts().sort_index().plot.bar()

<b>Regression function</b>: 
<br>Runs logit regression for a <code>term</code> of interest on indpendent variables in list of <code>vars</code>. Utilizes <code>preprocess_stat</code>, described above.
<br>
<br><code>df</code>: data (dataframe)
<br><code>term</code>: term of interest (e.g. 'urge')
<br><code>col_dict</code>: dictionary with column name (key, e.g. 'q1a') / response (value, e.g. 'urge') pair
<br><code>vars</code>: list of independent variables of interest for regression

In [79]:
def run_logit(df, term, col_dict, vars):
    logit_df = preprocess_stat(df, col_dict, vars)
    dep_var = list(col_dict.keys())[list(col_dict.values()).index(term)]
    mod_prob = OrderedModel(logit_df[dep_var], logit_df[vars], distr='probit')
    res_prob = mod_prob.fit(method='bfgs')
    logodds_df = pd.concat([np.exp(res_prob.params), res_prob.pvalues, np.exp(res_prob.conf_int())], axis=1).round(3)
    logodds_df.columns = ['Odds Ratio', 'p-value','5%', '95%']
    print(res_prob.summary())
    display(logodds_df)
    return logit_df

****

### Subanalyses

#### Question 1

<b>'Urge'

In [87]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'urge', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.963107
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q1c   Log-Likelihood:                -335.16
Model:                   OrderedModel   AIC:                             684.3
Method:            Maximum Likelihood   BIC:                             711.3
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.835,0.591,0.433,1.611
Gender,0.999,0.992,0.77,1.296
Hispanic,0.973,0.928,0.538,1.758
White,1.086,0.662,0.751,1.571
Other,0.788,0.469,0.413,1.502
1/2,0.391,0.0,0.237,0.645
2/3,0.682,0.0,0.56,0.831


<b> 'Craving'

In [88]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'craving', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.949846
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q1a   Log-Likelihood:                -330.55
Model:                   OrderedModel   AIC:                             675.1
Method:            Maximum Likelihood   BIC:                             702.1
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.643,0.19,0.331,1.246
Gender,1.163,0.26,0.894,1.512
Hispanic,1.194,0.569,0.649,2.199
White,1.02,0.915,0.705,1.477
Other,0.789,0.478,0.409,1.519
1/2,0.38,0.0,0.23,0.627
2/3,0.613,0.0,0.498,0.756


<b>'Impulse'

In [89]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'impulse', q1cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.012575
         Iterations: 27
         Function evaluations: 28
         Gradient evaluations: 28
                             OrderedModel Results                             
Dep. Variable:                    Q1g   Log-Likelihood:                -352.38
Model:                   OrderedModel   AIC:                             718.8
Method:            Maximum Likelihood   BIC:                             745.7
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.732,0.348,0.382,1.403
Gender,1.22,0.129,0.944,1.576
Hispanic,1.477,0.208,0.805,2.711
White,1.128,0.515,0.784,1.623
Other,0.612,0.138,0.32,1.171
1/2,0.479,0.004,0.293,0.785
2/3,0.7,0.0,0.58,0.844


***

#### Question 2

<b>'High'

In [90]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'high', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.872336
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q2a   Log-Likelihood:                -303.57
Model:                   OrderedModel   AIC:                             621.1
Method:            Maximum Likelihood   BIC:                             648.1
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.485,0.036,0.247,0.955
Gender,0.737,0.025,0.564,0.961
Hispanic,1.117,0.727,0.599,2.083
White,1.253,0.238,0.862,1.821
Other,0.588,0.116,0.303,1.14
1/2,0.264,0.0,0.157,0.442
2/3,0.669,0.0,0.541,0.827


<b>'Intoxicated'

In [91]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'intoxicated', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.012143
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q2i   Log-Likelihood:                -352.23
Model:                   OrderedModel   AIC:                             718.5
Method:            Maximum Likelihood   BIC:                             745.4
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.823,0.553,0.432,1.568
Gender,1.017,0.898,0.789,1.31
Hispanic,2.277,0.013,1.186,4.37
White,1.245,0.234,0.868,1.786
Other,0.442,0.019,0.224,0.873
1/2,0.496,0.005,0.304,0.81
2/3,0.747,0.002,0.623,0.895


<b>'Euphoria'

In [92]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'euphoria', q2cols, ind_vars)

Optimization terminated successfully.
         Current function value: 1.028101
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q2b   Log-Likelihood:                -357.78
Model:                   OrderedModel   AIC:                             729.6
Method:            Maximum Likelihood   BIC:                             756.5
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.987,0.969,0.519,1.879
Gender,0.733,0.016,0.57,0.943
Hispanic,1.118,0.707,0.624,2.006
White,2.041,0.0,1.422,2.929
Other,1.1,0.769,0.582,2.078
1/2,0.78,0.316,0.479,1.268
2/3,0.785,0.007,0.659,0.935


***

#### Question 3

<b>'Withdrawal symptoms'

In [93]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'withdrawal symptoms', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.808609
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q3b   Log-Likelihood:                -281.40
Model:                   OrderedModel   AIC:                             576.8
Method:            Maximum Likelihood   BIC:                             603.8
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.32,0.001,0.158,0.645
Gender,1.037,0.798,0.784,1.373
Hispanic,0.973,0.933,0.51,1.854
White,1.368,0.103,0.938,1.994
Other,0.909,0.786,0.458,1.805
1/2,0.254,0.0,0.149,0.432
2/3,0.629,0.0,0.5,0.79


<b>'Withdrawal'

In [94]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'withdrawal', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.819902
         Iterations: 29
         Function evaluations: 30
         Gradient evaluations: 30
                             OrderedModel Results                             
Dep. Variable:                    Q3a   Log-Likelihood:                -285.33
Model:                   OrderedModel   AIC:                             584.7
Method:            Maximum Likelihood   BIC:                             611.6
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.38,0.006,0.189,0.762
Gender,1.024,0.866,0.776,1.351
Hispanic,1.29,0.444,0.672,2.48
White,1.05,0.803,0.717,1.536
Other,0.527,0.067,0.266,1.046
1/2,0.213,0.0,0.125,0.363
2/3,0.655,0.0,0.524,0.819


<b>'Sick'

In [95]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'sick', q3cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.898459
         Iterations: 28
         Function evaluations: 29
         Gradient evaluations: 29
                             OrderedModel Results                             
Dep. Variable:                    Q3c   Log-Likelihood:                -312.66
Model:                   OrderedModel   AIC:                             639.3
Method:            Maximum Likelihood   BIC:                             666.3
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.688,0.277,0.35,1.351
Gender,1.059,0.675,0.811,1.383
Hispanic,1.58,0.152,0.845,2.953
White,1.472,0.039,1.019,2.126
Other,0.771,0.438,0.4,1.487
1/2,0.426,0.001,0.257,0.705
2/3,0.685,0.0,0.558,0.841


***

#### Question 4

E: <b>"Taking methadone daily is important for my treatment"

In [96]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'E', q4cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.599868
         Iterations: 32
         Function evaluations: 33
         Gradient evaluations: 33
                             OrderedModel Results                             
Dep. Variable:                    Q4e   Log-Likelihood:                -208.75
Model:                   OrderedModel   AIC:                             431.5
Method:            Maximum Likelihood   BIC:                             458.5
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,1.797,0.145,0.817,3.949
Gender,1.246,0.182,0.902,1.72
Hispanic,1.261,0.505,0.638,2.49
White,1.862,0.004,1.219,2.842
Other,0.83,0.606,0.408,1.688
1/2,0.527,0.028,0.298,0.933
2/3,0.48,0.0,0.352,0.654


H: <b>"Methadone is a life-saving medication"

In [97]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'H', q4cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.721531
         Iterations: 31
         Function evaluations: 32
         Gradient evaluations: 32
                             OrderedModel Results                             
Dep. Variable:                    Q4h   Log-Likelihood:                -251.09
Model:                   OrderedModel   AIC:                             516.2
Method:            Maximum Likelihood   BIC:                             543.2
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,0.786,0.515,0.381,1.623
Gender,1.203,0.218,0.896,1.614
Hispanic,0.842,0.627,0.42,1.688
White,1.669,0.011,1.127,2.473
Other,1.344,0.433,0.642,2.811
1/2,0.413,0.001,0.241,0.711
2/3,0.556,0.0,0.428,0.722


F: <b>"Counseling is an important part of my treatment"

In [98]:
ind_vars = ['Age', 'Gender', 'Hispanic', 'White', 'Other']
logit_df = run_logit(term_df, 'F', q4cols, ind_vars)

Optimization terminated successfully.
         Current function value: 0.703050
         Iterations: 32
         Function evaluations: 33
         Gradient evaluations: 33
                             OrderedModel Results                             
Dep. Variable:                    Q4f   Log-Likelihood:                -244.66
Model:                   OrderedModel   AIC:                             503.3
Method:            Maximum Likelihood   BIC:                             530.3
Date:                Thu, 25 Jul 2024                                         
Time:                        16:33:10                                         
No. Observations:                 348                                         
Df Residuals:                     341                                         
Df Model:                           5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------

Unnamed: 0,Odds Ratio,p-value,5%,95%
Age,1.237,0.579,0.584,2.618
Gender,1.258,0.135,0.931,1.698
Hispanic,1.184,0.631,0.594,2.359
White,1.202,0.383,0.795,1.818
Other,0.791,0.529,0.38,1.643
1/2,0.402,0.001,0.23,0.703
2/3,0.471,0.0,0.354,0.626


***

# Subgroup analyses

# Footnotes

Sullivan GM, Artino AR Jr. Analyzing and interpreting data from likert-type scales. J Grad Med Educ. 2013 Dec;5(4):541-2. doi: 10.4300/JGME-5-4-18. PMID: 24454995; PMCID: PMC3886444.
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3886444/

https://www.nacaa.com/file.ashx?id=c1f53033-a29a-4b7b-b9f0-a00b1fe3ff74