# Logistic Regression Analysis

### Import data and libraries

In [None]:
## Load modules
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os
from matplotlib import pyplot as plt

## Specify WD
#wd = "/home/abf/BINF667013_Final_Project/"
wd = (
    "/Users/adam/Documents/BINF667013_BigDataAnalyticsHealthcare/" +
    "Final_Project/TEDS_Study"
)
os.chdir(wd)

## Load data
# teds_laws = pd.read_csv("analysis_objects/teds_laws.csv")
teds_imp_laws = pd.read_csv("analysis_objects/teds_imp_laws.csv")


### Define A function to flag relapses
def relapse(x, **kwargs):
    if  (
            x['SUB1'] == kwargs['drug'] and (
            x['SUB1_D'] == kwargs['drug'] or\
            x['SUB2_D'] == kwargs['drug'] or\
            x['SUB3_D'] == kwargs['drug']
        )
    ):
        return 1
    else:
        return 0


## Add Relapse Columns
teds_imp_laws = teds_imp_laws.assign(
    alc_cases = lambda x: x.loc[:,['SUB1']].apply(lambda x: x['SUB1'] == 2, axis=1),
    hrn_cases = lambda x: x.loc[:,['SUB1']].apply(lambda x: x['SUB1'] == 5, axis=1),
    met_cases = lambda x: x.loc[:,['SUB1']].apply(lambda x: x['SUB1'] == 10, axis=1),
    alc_relapse = lambda x: x.loc[:,['SUB1', 'SUB1_D', 'SUB2_D', 'SUB3_D']].apply(relapse, axis=1, drug=2),
    hrn_relapse = lambda x: x.loc[:,['SUB1', 'SUB1_D', 'SUB2_D', 'SUB3_D']].apply(relapse, axis=1, drug=5),
    met_relapse = lambda x: x.loc[:,['SUB1', 'SUB1_D', 'SUB2_D', 'SUB3_D']].apply(relapse, axis=1, drug=10)
)




### Logit model to estimate influence of IC_laws on relapse rate

In [None]:
## Index casese of alcohol use at intake
alc_cases = teds_imp_laws.alc_cases == 1

## Fit Model
alc_result = smf.logit("alc_relapse ~ IC_law", data=teds_imp_laws.loc[alc_cases, :]).fit()
print(alc_result.summary())



In [None]:
## Index casese of heroin use at intake
hrn_cases = teds_imp_laws.hrn_cases == 1

## Fit Model
hrn_result = smf.logit("hrn_relapse ~ IC_law", data=teds_imp_laws.loc[hrn_cases, :]).fit()
print(hrn_result.summary())



In [None]:
## Index casese of methamphetamine use at intake
met_cases = teds_imp_laws.met_cases == 1

## Fit Model
met_result = smf.logit("met_relapse ~ IC_law", data=teds_imp_laws.loc[met_cases, :]).fit()
print(met_result.summary())



### Ordinal logstic regression to estimate influence of IC_laws on length of stay

In [None]:
## Convert Length of Stay to Ordinal -- this may require Biomix
## Failed to Converge after almost an hour
from statsmodels.miscmodels.ordinal_model import OrderedModel
teds_imp_laws['LOS']=teds_imp_laws['LOS'].astype(
    pd.CategoricalDtype(
        categories =[i for i in range(0,38)], ordered=True)
)

#results = OrderedModel(
#    teds_imp_laws['LOS'],
#    teds_imp_laws['IC_law'],
#    distr='logit'
#).fit(method='bfgs')


## Chi Square Test for significance -- obviously low p value
tab=pd.crosstab(teds_imp_laws['IC_law'], teds_imp_laws['LOS'])
table=sm.stats.Table(tab)
print(table.test_nominal_association())
print(table.test_nominal_association().pvalue)
