In [1]:
# Required to access the database
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

# Data analysis tools
import pandas as pd
import numpy as np
import seaborn as sns

# Models available in our application
from datasets.models import RawFlower, RawUNM, RawDAR
from django.contrib.auth.models import User


from datasets.models import RawNEU
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels

!pip install lxml


Defaulting to user installation because normal site-packages is not writeable


In [2]:
from api import adapters
from api import analysis

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
def crude_reg(df_merged, x_feature, y_feature, adjust_dilution, use_covars):
   
    ## adjust dilution
    if adjust_dilution == True:
        df_merged[x_feature] = df_merged[x_feature] / df_merged['UDR']

    if use_covars:
        data = df_merged
        data.drop(['CohortType'], inplace = True, axis = 1)

    else:
        data = df_merged[[x_feature,y_feature]]

    
    ## problem - if we are using z_score to predict might be an issue
    
    
    

    data['intercept'] = 1
    #X and Y features TODO: clean up
    X = data[[x for x in data.columns if x !=y_feature and x!= 'PIN_Patient']]
    
    #print(X.info())
    Y = data[y_feature]
    
    X[x_feature]= np.log(X[x_feature])
    
    if df_merged.shape[0] > 2:

        reg = sm.OLS(Y, X).fit() 
        ret = reg.summary()
    else:
        ret = 'error'

    # model string
    fit_string = y_feature + '~'
    for x in X.columns:
        if x == x_feature:
            fit_string += ' + log(' + str(x) +')'
        else:
            fit_string += ' + ' + str(x)
    

    #htmls = header + ret.tables[0].as_html() + ret.tables[1].as_html() 
    df = pd.read_html(ret.tables[1].as_html(),header=0,index_col=0)[0]

    return df

def crude_logreg(df_merged, x_feature, y_feature, adjust_dilution, use_covars):
   
    ## adjust dilution
    if adjust_dilution == True:
        df_merged[x_feature] = df_merged[x_feature] / df_merged['UDR']

    if use_covars:
        data = df_merged
        data.drop(['CohortType'], inplace = True, axis = 1)

    else:
        data = df_merged[[x_feature,y_feature]]

    
    ## problem - if we are using z_score to predict might be an issue
    
    data['intercept'] = 1
    #X and Y features TODO: clean up
    X = data[[x for x in data.columns if x !=y_feature and x!= 'PIN_Patient']]
    
    
    Y = data[y_feature]
    
    X[x_feature]= np.log(X[x_feature])

    # fit the model
    print('columns going into logreg')
    print(X.columns)
    if df_merged.shape[0] > 1:
        log_reg = sm.Logit(Y, X).fit()
        ret = log_reg.summary()
    else:
        ret = 'error'
        
        
    # model string
    fit_string = y_feature + '~'
    for x in X.columns:
        if x == x_feature:
            fit_string += ' + log(' + str(x) +')'
        else:
            fit_string += ' + ' + str(x)
    
    df = pd.read_html(ret.tables[1].as_html(),header=0,index_col=0)[0]     

    return df

In [4]:
def dummy_code(df, covars_cat, contin):
    coded_covars = []
    orig_shape = df.shape[0]
    for var in covars_cat:

        df[var] = pd.Categorical(df[var])


        dummies_df = pd.get_dummies(df[var], prefix = var, drop_first=True)

        coded_covars = coded_covars + [ x for x in dummies_df.columns.tolist()]

        df = pd.concat([df, dummies_df], axis = 1)
        df.drop([var], inplace = True, axis = 1)
        
        assert df.shape[0] == orig_shape
    
    #print(coded_covars + contin)
    return df[coded_covars + contin]


In [5]:

from api import dilutionproc   



def printsummary(df):
    
    x = 1
    # spearate the data into cat and continuous summary:
    

In [6]:
# Get the data

## Model 1: Restricted to participants with no fish/seafood consumption.

## Get NEU data with no fish
df_NEU = adapters.neu.get_dataframe_orig()
df_NEU = df_NEU[df_NEU['TimePeriod']==2] # Visit 2

df_NEU_covars = adapters.neu.get_dataframe_covars()
df_NEU = df_NEU_covars.merge(df_NEU, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates

df_NEU = df_NEU[(df_NEU['fish_pu_v2'] == 0) & (df_NEU['fish'] == 0)] #No fish consumption

## Get DAR data with no fish
df_DAR = adapters.dar.get_dataframe_nofish()
## Get UNM data with no fis
df_UNM = adapters.unm.get_dataframe_orig()
#df_UNM = df_UNM[df_UNM['fish']==0]
df_UNM_covars = adapters.unm.get_dataframe_covars()

df_UNM = df_UNM_covars.merge(df_UNM, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates

df_NEU = df_NEU.replace(-9,np.nan).replace('-9', np.nan)
#df_ALL = analysis.merge3CohortFrames(df_UNM,df_NEU,df_DAR)
df_ALL = df_NEU

frames_for_adjust = [
    ('NEU', df_NEU)
    
]


frames_for_analysis = [
    ('NEU', df_NEU),
    ('ALL', df_ALL)
    
]

for name, df in frames_for_analysis:
    print('Data Stats')
    print(name)
    print(df.shape)



Data Stats
NEU
(379, 49)
Data Stats
ALL
(379, 49)


In [7]:
##Run the adjustment

for name, df_coh in frames_for_adjust:
    print('Working on ', name)

    keep_adj = []
    #variables for fitting procedure
    x_feature = 'UTAS'
    cat_vars = ['babySex','smoking','education','race']
    contin_vars = ['PIN_Patient','BMI','UTAS'] 

    # dummy code
    df_coh_coded_model =  dummy_code(df_coh, cat_vars, contin_vars)

    ## variables for addjustment procedure
    adjust_cat_vars =  ['babySex','smoking','education','race']
    adjust_contin_vars = ['PIN_Patient','CohortType','BMI', 'ga_collection','birth_year','age']
    #add proper variable depending on cohort
    if name == 'NEU':

        adjust_contin_vars= adjust_contin_vars + ['SPECIFICGRAVITY_V2']

    if name == 'UNM':

        adjust_contin_vars = adjust_contin_vars + ['cratininemgl']

    if name == 'DAR':

        adjust_contin_vars = adjust_contin_vars + ['darvar']

    ## adjustment procedure
    if name in ['NEU', 'UNM', 'NEU']:
        #dummy code 
        df_coh_coded_adjust_model =  dummy_code(df_coh, adjust_cat_vars, adjust_contin_vars)

        d_test = df_coh_coded_adjust_model.dropna()

        dil_adj = dilutionproc.predict_dilution(d_test, 'NEU')

        fin = df_coh_coded_model.merge(dil_adj[['PIN_Patient','UDR']], on = ['PIN_Patient'])

        adjs = dil_adj[['PIN_Patient','UDR']]
        adjs.loc[:,'CohortType'] = name

        keep_adj.append(adjs)
        print('Done')

Working on  NEU
Model out 358. afterocnf 358. check ids 358
Done


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [8]:
cohort_adjustmets = pd.concat(keep_adj)

cohort_adjustmets

Unnamed: 0,PIN_Patient,UDR,CohortType
0,2509,1.00255,NEU
1,2510,0.910953,NEU
2,2511,0.916386,NEU
3,2513,0.655646,NEU
4,2514,1.21662,NEU
5,2515,1.175886,NEU
6,2517,1.520292,NEU
7,2520,0.617405,NEU
8,2521,0.945163,NEU
9,2524,1.230321,NEU


In [21]:

'''
    
    ('UNM', df_UNM),
    ('DAR', df_DAR),
    ('NEUUNM', df_NEUUNM),
    ('NEUDAR', df_NEUDAR),
    ('UNMDAR', df_UNMDAR),
    ('UNMDARNEU', df_merged_3),
]
'''


#d_test = df_NEU[['PIN_Patient','CohortType','race', 'education','babySex','BMI', 'ga_collection','birth_year','age','SPECIFICGRAVITY_V2']]
#all_vars = covars + [x_feature] 
Y_features_continuous = ['Outcome_weeks','birthWt', 'headCirc', 'birthLen']
Y_features_binary    =  ['LGA','SGA','Outcome']




outputs_conf = []
outputs_crude = []

for outcome in Y_features_binary + Y_features_continuous:
    
    
    for name, df_coh in frames_for_analysis:
        print('Working on ', name)
    
        
        
        
        #variables for fitting procedure
        x_feature = 'UTAS'
        cat_vars = ['babySex','smoking','education','race']
        contin_vars = ['PIN_Patient','BMI','UTAS'] + [outcome]
        
        # dummy code
        df_coh_coded_model =  dummy_code(df_coh, cat_vars, contin_vars)
        
        ## variables for addjustment procedure
        adjust_cat_vars =  ['babySex','smoking','education','race']
        adjust_contin_vars = ['PIN_Patient','CohortType','BMI', 'ga_collection','birth_year','age']
        
            
        if name in ['NEU', 'UNM', 'NEU']:
            #dummy code 
            print("go")
            
            fin = df_coh_coded_model.merge(cohort_adjustmets, on = ['PIN_Patient'])
            
            print(fin.columns)
            #sdf
            
        if name in ['ALL']:
            x = 1
            if len(keep_adj) == 1: df_adj_all = pd.concat(keep_adj)
                
            fin = df_coh_coded_model.merge(df_adj_all, on = ['PIN_Patient'])
            
        # run models:

        if outcome in Y_features_continuous:
            
            fin = fin.dropna()
            
            
            output = crude_reg(fin, x_feature, outcome, False, True)
            output['y'] = outcome
            output['crude'] = False
            output['model'] = 'OLS'
            
            outputs_conf.append(output)
            
            output_crude = crude_reg(fin, x_feature, outcome, False, False)
            output_crude['y'] = outcome
            output_crude['crude'] = True
            output_crude['model'] = 'OLS'
            
            outputs_conf.append(output_crude)
            
        if outcome in Y_features_binary:
            fin = fin.dropna()
            
            
            output = crude_logreg(fin, x_feature, outcome, False, True)
            output.columns = ['coef', 'std err', 't', 'P>|t|', '[0.025','0.975]']
            output['y'] = outcome
            output['crude'] = False
            output['model'] = 'Logit'
            
            outputs_conf.append(output)
            
            output_crude = crude_logreg(fin, x_feature, outcome, False, False)
            output_crude.columns = ['coef', 'std err', 't', 'P>|t|', '[0.025','0.975]']
            output_crude['y'] = outcome
            output_crude['crude'] = True
            output_crude['model'] = 'Logit'
        
            outputs_conf.append(output_crude)
    
    




# set output paths for results:

#utput_path_model1_adj = '/usr/src/app/mediafiles/analysisresults/model1adj/'
#utput_path_model1_noadj = '/usr/src/app/mediafiles/analysisresults/model1noadj/'

#ry:
#   os.mkdir(output_path_model1_adj)
#   os.mkdir(output_path_model1_noadj)
#xcept:
#   print('Exists')


# start analysis






Working on  NEU
go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'LGA', 'UDR', 'CohortType'],
      dtype='object')
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept'],
      dtype='object')
         Current function value: 0.261179
         Iterations: 35
columns going into logreg
Index(['UTAS', 'intercept'], dtype='object')
Optimization terminated successfully.
         Current function value: 0.288120
         Iterations 7
Working on  ALL
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[x_feature]= np.log(X[x_feature])
  warn("Maximum Likelihood optimization failed to converge. "
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1
  warn("Maximum Likelihood optimization failed to converge. "


go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'SGA', 'UDR', 'CohortType'],
      dtype='object')
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept'],
      dtype='object')
         Current function value: 0.286599
         Iterations: 35
columns going into logreg
Index(['UTAS', 'intercept'], dtype='object')
Optimization terminated successfully.
         Current function value: 0.319352
         Iterations 6
Working on  ALL
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept'],
      dtype='o

  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "


go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'Outcome', 'UDR',
       'CohortType'],
      dtype='object')
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept'],
      dtype='object')
         Current function value: 0.226992
         Iterations: 35
columns going into logreg
Index(['UTAS', 'intercept'], dtype='object')
Optimization terminated successfully.
         Current function value: 0.265271
         Iterations 7
Working on  ALL
columns going into logreg
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'BMI', 'UTAS', 'UDR', 'intercept'],
   

  warn("Maximum Likelihood optimization failed to converge. "
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[x_feature]= np.log(X[x_feature])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1


Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'Outcome_weeks', 'UDR',
       'CohortType'],
      dtype='object')
Working on  ALL
Working on  NEU
go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'birthWt', 'UDR',
       'CohortType'],
      dtype='object')
Working on  ALL
Working on  NEU
go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_3', 'race_6', 'race_888', 'race_97',
       'race_999', 'PIN_Patient', 'BMI', 'UTAS', 'headCirc', 'UDR',
       'CohortType'],
      dtype='object')
Working on  ALL
Working on  NEU
go
Index(['babySex_2', 'education_2.0', 'education_3.0', 'education_4.0',
       'education_5.0', 'race_2', 'race_

In [22]:
pd.concat(outputs_conf)

Unnamed: 0,coef,std err,t,P>|t|,[0.025,0.975],y,crude,model
babySex_2,-0.5507,0.416,-1.325,0.185,-1.365,0.264,LGA,False,Logit
education_2.0,-0.7282,0.917,-0.794,0.427,-2.526,1.069,LGA,False,Logit
education_3.0,-0.7565,0.978,-0.774,0.439,-2.673,1.16,LGA,False,Logit
education_4.0,-0.0457,0.842,-0.054,0.957,-1.695,1.604,LGA,False,Logit
education_5.0,0.7213,0.879,0.82,0.412,-1.002,2.444,LGA,False,Logit
race_2,1.1003,1.242,0.886,0.376,-1.334,3.535,LGA,False,Logit
race_3,-17.6251,19000.0,-0.001,0.999,-37200.0,37100.0,LGA,False,Logit
race_6,0.2427,0.41,0.591,0.554,-0.562,1.047,LGA,False,Logit
race_888,-22.4844,215000.0,-0.0,1.0,-422000.0,422000.0,LGA,False,Logit
race_97,-21.7456,208000.0,-0.0,1.0,-408000.0,408000.0,LGA,False,Logit


In [None]:
for name, frame in frames_for_analysis:


    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))

    for y_feature in Y_features_continuous:
        
       
        
        output = crude_reg(frame, x_feature, y_feature)
        #ext_writing(name, frame, x_feature, y_feature, all_vars, output_path_model1_adj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature), 'Linear Regression')
        
        print(output)

    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature)
        #ext_writing(name, frame, x_feature, y_feature, all_vars, output_path_model1_adj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')

        print(output)


In [None]:
from api import dilutionproc

d_test = df_NEU[['PIN_Patient','CohortType','race', 'education','babySex','BMI', 'ga_collection','birth_year','age','SPECIFICGRAVITY_V2']]
d_test = d_test.dropna()

dilutionproc.predict_dilution(d_test, 'NEU')

In [None]:
#Model 2: Restricted to participants with arsenic speciation data.

## Get data with fish
df_UNM = adapters.unm.get_dataframe()
df_DAR = adapters.dar.get_dataframe_pred()

## merge data frames
df_UNMDAR = merge2CohortFrames(df_UNM,df_DAR)

frames_for_analysis = [
    ('UNM', df_UNM),
    ('DAR', df_DAR),
    ('UNMDAR', df_UNMDAR)
]

for name, df in frames_for_analysis:
    print('Data Stats')
    print(name)
    print(df.shape)

x_feature = 'UTAS'
covars = 'babySex|BMI|parity|smoking|education'
all_vars = covars.split('|') + [x_feature] 
Y_features_continous = ['Outcome_weeks','birthWt', 'headCirc', 'birthLen']
Y_features_binary    = ['LGA','SGA','Outcome']

output_path_model2_adj = '/usr/src/app/mediafiles/analysisresults/model2adj/'
output_path_model2_noadj = '/usr/src/app/mediafiles/analysisresults/model2noadj/'

#output_path = '../mediafiles/analysisresults/'

try:
    os.mkdir(output_path_model2_adj)
    os.mkdir(output_path_model2_noadj)
except:
    print('Exists')

for name, frame in frames_for_analysis:


    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))

    for y_feature in Y_features_continous:
        output= crude_reg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model2_adj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')

    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model2_adj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')

#without adjustment

for name, frame in frames_for_analysis:


    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))

    for y_feature in Y_features_continous:
        output = crude_reg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model2_noadj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')

    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model2_noadj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')



#Model 3: Restricted to arsenic speciation data with AsB ≤1 µg/L.

x_feature = 'UTAS'
covars = 'babySex|BMI|parity|smoking|education'
all_vars = covars.split('|') + [x_feature] 
Y_features_continous = ['Outcome_weeks','birthWt', 'headCirc', 'birthLen']
Y_features_binary    = ['LGA','SGA','Outcome']

## Number of Participants
output_path_model3_adj = '/usr/src/app/mediafiles/analysisresults/model3adj/'
output_path_model3_noadj = '/usr/src/app/mediafiles/analysisresults/model3noadj/'
#output_path = '../mediafiles/analysisresults/'

try:
    os.mkdir(output_path_model3_adj)
    os.mkdir(output_path_model3_noadj)
except:
    print('Exists')

# remove the AsB <= 1
df_UNM = df_UNM[df_UNM['UASB'] <= 1]
df_DAR = df_DAR[df_DAR['UASB'] <= 1]

df_UNMDAR_UASB = df_UNMDAR[df_UNMDAR['UASB'] <= 1]

frames_for_analysis3 = [
    ('UNM', df_UNM),
    ('DAR', df_DAR),
    ('UNMDAR', df_UNMDAR)
]

for name, frame in frames_for_analysis3:

    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))

    for y_feature in Y_features_continous:
        output = crude_reg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model3_adj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')


    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model3_adj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')


#no adj
for name, frame in frames_for_analysis3:

    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))


    for y_feature in Y_features_continous:
        output = crude_reg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model3_noadj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')


    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model3_noadj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')


#Model 4: Sensitivity analysis 

x_feature = 'UTAS'
covars = 'babySex|BMI|parity|smoking|education'
all_vars = covars.split('|') + [x_feature] 
Y_features_continous = ['Outcome_weeks','birthWt', 'headCirc', 'birthLen']
Y_features_binary    = ['LGA','SGA','Outcome']

## Number of Participants
output_path_model4_adj = '/usr/src/app/mediafiles/analysisresults/model4adj/'
output_path_model4_noadj = '/usr/src/app/mediafiles/analysisresults/model4noadj/'
#output_path = '../mediafiles/analysisresults/'

try:
    os.mkdir(output_path_model4_adj)
    os.mkdir(output_path_model4_noadj)
except:
    print('Exists')

## Get data all
df_NEU = adapters.neu.get_dataframe()
df_UNM = adapters.unm.get_dataframe()
df_DAR = adapters.dar.get_dataframe_pred()

## merge data frames
df_NEUUNM = merge2CohortFrames(df_NEU,df_UNM)
df_NEUDAR = merge2CohortFrames(df_NEU,df_DAR)
df_UNMDAR = merge2CohortFrames(df_UNM,df_DAR)
df_merged_3 = merge3CohortFrames(df_NEU,df_UNM,df_DAR)

frames_for_analysis4 = [
    ('NEU', df_NEU),
    ('UNM', df_UNM),
    ('DAR', df_DAR),
    ('NEUUNM', df_NEUUNM),
    ('NEUDAR', df_NEUDAR),
    ('UNMDAR', df_UNMDAR),
    ('UNMDARNEU', df_merged_3),
]

for name, frame in frames_for_analysis4:

    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))

    for y_feature in Y_features_continous:
        output = crude_reg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model4_adj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')

    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'True', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model4_adj, output, "logistic_reg{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')


#no adj
for name, frame in frames_for_analysis3:

    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))
    frame = frame[(frame['UTAS'] > 0) & (~frame['UTAS'].isna())]
    print('Min: {} Max: {}'.format(frame['UTAS'].min(), frame['UTAS'].max()))


    for y_feature in Y_features_continous:
        output = crude_reg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model4_noadj, output, "linear_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Linear Regression')

    for y_feature in Y_features_binary:
        output = crude_logreg(frame, x_feature, y_feature, covars, 'False', 'csv', True)
        text_writing(name, frame, x_feature, y_feature, all_vars, output_path_model4_noadj, output, "logistic_reg_{}_{}_log({}).txt".format(name, y_feature, x_feature),'Logistic Regression')
