In [1]:
# Required to access the database
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

# Data analysis tools
import pandas as pd
import numpy as np
import seaborn as sns

# Models available in our application
from datasets.models import RawFlower, RawUNM, RawDAR
from django.contrib.auth.models import User
from datasets.models import RawDictionary


from datasets.models import RawNEU
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels

!pip install lxml

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
from api import adapters
from api import analysis

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
def dummy_code(df, covars_cat, contin):
    coded_covars = []
    orig_shape = df.shape[0]
    for var in covars_cat:

        df[var] = pd.Categorical(df[var])

        dummies_df = pd.get_dummies(df[var], prefix = var, drop_first=True)

        coded_covars = coded_covars + [ x for x in dummies_df.columns.tolist()]

        df = pd.concat([df, dummies_df], axis = 1)
        df.drop([var], inplace = True, axis = 1)
        
        assert df.shape[0] == orig_shape
    
    #print(coded_covars + contin)
    return df[coded_covars + contin]


In [4]:
from api import dilutionproc   

In [5]:
def merge3CohortFrames2(df1,df2,df3):
    'merge on feature intersections'

    for as_feature in ['UASB', 'UDMA', 'UAS5', 'UIAS', 'UAS3', 'UMMA']:
        if as_feature not in df1.columns:
            df1[as_feature] = np.nan
        if as_feature not in df2.columns:
            df2[as_feature] = np.nan
        if as_feature not in df3.columns:
            df3[as_feature] = np.nan

    s1 = set(df1.columns)
    s2 = set(df2.columns)
    s3 = set(df3.columns)

    cc = set.intersection(s1, s2, s3)

    df_all = pd.concat([df1[cc],df2[cc],df3[cc]])

    return df_all

In [6]:
## decide whether to exclude fish consumption (True = include fish, False = No fish)
#Model1 = False
#Model4 = True
fish_consumption = True
## decide whether to adjust the model using the dilution procedure (True = run dilution adj, False = no)
run_adjustment = True

In [7]:
## Get the data

## Get NEU data with no fish
df_NEU = adapters.neu.get_dataframe_orig()
df_NEU = df_NEU[df_NEU['TimePeriod']==2] # Visit 2

df_NEU_covars = adapters.neu.get_dataframe_covars()
df_NEU = df_NEU_covars.merge(df_NEU, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates

## Get DAR data
#df_DAR = adapters.unm.get_dataframe_orig()

df_DAR = df_NEU.copy()
df_DAR['CohortType'] = 'DAR'

## Get UNM data with no fis
'''
df_UNM = adapters.unm.get_dataframe_orig()
df_UNM_covars = adapters.unm.get_dataframe_covars()

df_UNM = df_UNM_covars.merge(df_UNM, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates

df_NEU = df_NEU.replace(-9,np.nan).replace('-9', np.nan)
'''

df_UNM = df_NEU.copy()
df_UNM['CohortType'] = 'UNM'

# Remove fish
if fish_consumption == False:
    df_NEU = df_NEU[(df_NEU['fish_pu_v2'] == 0) & (df_NEU['fish'] == 0)] #No fish consumption
    df_UNM = df_UNM[df_UNM['fish']==0]
    df_DAR = adapters.dar.get_dataframe_nofish()
    

df_ALL = merge3CohortFrames2(df_NEU, df_UNM, df_DAR)

frames_for_adjust = [
    ('NEU', df_NEU),
    ('UNM', df_UNM),
    ('DAR', df_DAR)
]


#df_ALL = analysis.merge3CohortFrames(df_NEU, df_UNM, df_DAR)
frames_for_analysis = [
    ('NEU', df_NEU),
    ('UNM', df_UNM),
    ('DAR', df_DAR),
    ('ALL', df_ALL)

]

for name, df in frames_for_analysis:
    print('Data Stats')
    print(name)
    print(df.shape)

Data Stats
NEU
(570, 55)
Data Stats
UNM
(570, 55)
Data Stats
DAR
(570, 55)
Data Stats
ALL
(1710, 55)


In [8]:
df_UNM.columns

Index(['PIN_Patient', 'CohortType', 'TimePeriod', 'Outcome_weeks', 'age',
       'ethnicity', 'race', 'Outcome', 'BMI', 'smoking', 'parity',
       'preg_complications', 'ga_collection', 'folic_acid_supp', 'fish',
       'babySex', 'birthWt', 'birthLen', 'headCirc', 'WeightCentile', 'LGA',
       'SGA', 'education', 'birth_year', 'SPECIFICGRAVITY_V2', 'fish_pu_v2',
       'Member_c', 'UBA', 'UBE', 'UCD', 'UCO', 'UCR', 'UCS', 'UCU', 'UHG',
       'UMN', 'UMO', 'UNI', 'UPB', 'UPT', 'USB', 'USE', 'USN', 'UTAS', 'UTL',
       'UTU', 'UUR', 'UVA', 'UZN', 'UASB', 'UDMA', 'UAS5', 'UIAS', 'UAS3',
       'UMMA'],
      dtype='object')

In [9]:
##Run the adjustment
keep_adj = []
for name, df_coh in frames_for_adjust:
    print('Working on ', name)

    
    #variables for fitting procedure
    x_feature = 'UTAS'
    cat_vars = ['babySex','smoking','education']
    contin_vars = ['PIN_Patient','BMI','UTAS'] 

    # dummy code
    df_coh_coded_model =  dummy_code(df_coh, cat_vars, contin_vars)

    ## variables for addjustment procedure
    adjust_cat_vars =  ['babySex','smoking','education','race']
    adjust_contin_vars = ['PIN_Patient','CohortType','BMI', 'ga_collection','birth_year','age']
    
    #add proper variable depending on cohort
    if name == 'NEU':

        adjust_contin_vars= adjust_contin_vars + ['SPECIFICGRAVITY_V2']

    if name == 'UNM':
        adjust_contin_vars= adjust_contin_vars + ['SPECIFICGRAVITY_V2']
        #adjust_contin_vars = adjust_contin_vars + ['creatininemgdl']

    if name == 'DAR':
        adjust_contin_vars= adjust_contin_vars + ['SPECIFICGRAVITY_V2']
        #adjust_contin_vars = adjust_contin_vars + ['urine_specific_gravity']

    ## adjustment procedure
    if name in ['NEU', 'UNM', 'DAR']:
        #dummy code 
        df_coh_coded_adjust_model =  dummy_code(df_coh, adjust_cat_vars, adjust_contin_vars)

        d_test = df_coh_coded_adjust_model.dropna()
        
        print(d_test.shape)
    
        dil_adj = dilutionproc.predict_dilution(d_test, name)

        fin = df_coh_coded_model.merge(dil_adj[['PIN_Patient','UDR']], on = ['PIN_Patient'])

        adjs = dil_adj[['PIN_Patient','UDR']]
        adjs.loc[:,'CohortType'] = name
        print(adjs.shape)
        keep_adj.append(adjs)
        print('Done')

Working on  NEU
(522, 25)
522
Model out 522. afterocnf 522. check ids 522
(522, 3)
Done
Working on  UNM
(522, 25)
522
Model out 522. afterocnf 522. check ids 522
(522, 3)
Done
Working on  DAR
(522, 25)
522
Model out 522. afterocnf 522. check ids 522
(522, 3)
Done


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexe

In [10]:
cohort_adjustments = pd.concat(keep_adj)

cohort_adjustments


Unnamed: 0,PIN_Patient,UDR,CohortType
0,1001,1.026949,NEU
1,1006,1.359343,NEU
2,1014,1.106104,NEU
3,1015,1.337326,NEU
4,1021,0.917676,NEU
5,1023,1.518021,NEU
6,1025,0.953188,NEU
7,1026,1.167092,NEU
8,1033,1.439561,NEU
9,1034,0.911934,NEU


In [11]:
import os
import shutil


In [12]:
#directories to store the results based on combination of fish and adjustment procedurs. If exists, delete.


directories_for_output = ['rresultslme','rresultslmer','rresultsglm','rresultsglmer']

for directory in directories_for_output:

    try:
        shutil.rmtree(directory + '_' + str(run_adjustment) + '_' + str(fish_consumption))
    except OSError as e:
        print("Error: %s - %s." % (e.filename, e.strerror))
    
    os.mkdir(directory + '_' + str(run_adjustment) + '_' + str(fish_consumption))

In [13]:
# dictonaries to hold unadjusted data frames for analysis
frames_to_r_indv = dict()
bin_frames_to_r_indv = dict()
frames_to_r_all = dict()
bin_frames_to_r_all = dict()



#d_test = df_NEU[['PIN_Patient','CohortType','race', 'education','babySex','BMI', 'ga_collection','birth_year','age','SPECIFICGRAVITY_V2']]
#all_vars = covars + [x_feature] 
Y_features_continuous = ['Outcome_weeks','birthWt', 'headCirc', 'birthLen']
Y_features_binary    =  ['LGA','Outcome','SGA']


outputs_conf = []
outputs_crude = []


for outcome in Y_features_binary + Y_features_continuous:
    
    
    for name, df_coh in frames_for_analysis:
        print('Working on ', name)
    

        #variables for fitting procedure
        x_feature = 'UTAS'
        cat_vars = ['babySex','smoking','education']
        
        if outcome in Y_features_binary:
            contin_vars = ['PIN_Patient','BMI','UTAS','parity'] + [outcome]
        if outcome in Y_features_continuous:
            contin_vars = ['PIN_Patient','BMI','UTAS','parity'] + [outcome]
        
        # dummy code
        
        df_coh_coded_model =  dummy_code(df_coh, cat_vars, contin_vars)
        
        ## variables for addjustment procedure
        adjust_cat_vars =  ['babySex','smoking','education','race']
        adjust_contin_vars = ['PIN_Patient','CohortType','BMI', 'ga_collection','birth_year','age']
        
            
        if name in ['NEU', 'UNM', 'DAR']:

            fin = df_coh_coded_model.merge(cohort_adjustments, on = ['PIN_Patient'])
            
            if outcome in Y_features_continuous and name != 'ALL':

                fin = fin.dropna()
                
                if run_adjustment == True:
                    fin['UTAS'] = fin['UTAS'] / fin['UDR']
                    frames_to_r_indv[name + '|' + outcome ] = fin
                if run_adjustment == False:
                    frames_to_r_indv[name + '|' + outcome ] = fin
                    
                
            if outcome in Y_features_binary and name != 'ALL':

                fin = fin.dropna()
                
                if run_adjustment == True:
                    fin['UTAS'] = fin['UTAS'] / fin['UDR']
                    bin_frames_to_r_indv[name + '|' + outcome] = fin
                if run_adjustment == False:
                    bin_frames_to_r_indv[name + '|' + outcome] = fin
            
            
        if name in ['ALL']:
            
            if len(keep_adj) == 3: df_adj_all = pd.concat(keep_adj)
                
            fin = df_coh_coded_model.merge(df_adj_all, on = ['PIN_Patient'])
          
            if outcome in Y_features_continuous and name == 'ALL':

                fin = fin.dropna()
                if run_adjustment == True:
                    fin['UTAS']
                    frames_to_r_all[name + '|' + outcome ] = fin
                if run_adjustment == False:
                    frames_to_r_all[name + '|' + outcome ] = fin
                    
            if outcome in Y_features_binary and name == 'ALL':

                fin = fin.dropna()
                
                if run_adjustment == True:
                    fin['UTAS']
                    bin_frames_to_r_all[name + '|' + outcome] = fin
                if run_adjustment == False:    
                    bin_frames_to_r_all[name + '|' + outcome] = fin

Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL
Working on  NEU
Working on  UNM
Working on  DAR
Working on  ALL


In [14]:
frames_to_r_indv.keys()

dict_keys(['NEU|Outcome_weeks', 'UNM|Outcome_weeks', 'DAR|Outcome_weeks', 'NEU|birthWt', 'UNM|birthWt', 'DAR|birthWt', 'NEU|headCirc', 'UNM|headCirc', 'DAR|headCirc', 'NEU|birthLen', 'UNM|birthLen', 'DAR|birthLen'])

In [15]:
bin_frames_to_r_indv.keys()

dict_keys(['NEU|LGA', 'UNM|LGA', 'DAR|LGA', 'NEU|Outcome', 'UNM|Outcome', 'DAR|Outcome', 'NEU|SGA', 'UNM|SGA', 'DAR|SGA'])

In [16]:
frames_to_r_indv['UNM|Outcome_weeks'].shape

(1563, 17)

# Start R stuff

In [17]:
%load_ext rpy2.ipython


In [18]:
# Analysis for individual data

In [19]:
# Set up output directories for results

outputdir_lme = directories_for_output[0] + '_' + str(run_adjustment) + '_' + str(fish_consumption)
outputdir_lmer = directories_for_output[1] + '_' + str(run_adjustment) + '_' + str(fish_consumption)
outputdir_glm = directories_for_output[2]  + '_' +  str(run_adjustment) + '_' + str(fish_consumption)
outputdir_glmer = directories_for_output[3]  + '_' +  str(run_adjustment) + '_' + str(fish_consumption)


%Rpush outputdir_lme
%Rpush outputdir_lmer
%Rpush outputdir_glm
%Rpush outputdir_glmer

In [20]:
data_outcome_weeks_NEU = frames_to_r_indv['NEU|Outcome_weeks']
data_birthWt_NEU  = frames_to_r_indv['NEU|birthWt']
data_headCirc_NEU = frames_to_r_indv['NEU|headCirc']
data_birthLen_NEU = frames_to_r_indv['NEU|birthLen']

%Rpush data_outcome_weeks_NEU
%Rpush data_birthWt_NEU
%Rpush data_headCirc_NEU
%Rpush data_birthLen_NEU


data_outcome_weeks_DAR = frames_to_r_indv['DAR|Outcome_weeks']
data_birthWt_DAR  = frames_to_r_indv['DAR|birthWt']
data_headCirc_DAR = frames_to_r_indv['DAR|headCirc']
data_birthLen_DAR = frames_to_r_indv['DAR|birthLen']

%Rpush data_outcome_weeks_DAR
%Rpush data_birthWt_DAR
%Rpush data_headCirc_DAR
%Rpush data_birthLen_DAR


data_outcome_weeks_UNM = frames_to_r_indv['UNM|Outcome_weeks']
data_birthWt_UNM  = frames_to_r_indv['UNM|birthWt']
data_headCirc_UNM = frames_to_r_indv['UNM|headCirc']
data_birthLen_UNM = frames_to_r_indv['UNM|birthLen']

%Rpush data_outcome_weeks_UNM
%Rpush data_birthWt_UNM
%Rpush data_headCirc_UNM
%Rpush data_birthLen_UNM

In [21]:
fit_str_outcome_weeks_indv = 'Outcome_weeks ~ parity + babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 

fit_str_birthWt_indv = 'birthWt ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 

fit_str_headCirc_indv = 'headCirc ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 

fit_str_birthLen_indv = 'birthLen ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 


%Rpush fit_str_outcome_weeks_indv
%Rpush fit_str_birthWt_indv
%Rpush fit_str_headCirc_indv
%Rpush fit_str_birthLen_indv

In [22]:
analysis_info = [[fit_str_outcome_weeks_indv, data_outcome_weeks_NEU, 'NEU_cohorts_outcome_UTAS.txt'],
                [fit_str_birthWt_indv, data_birthWt_NEU, 'NEU_cohorts_birthWt_UTAS.txt'],
                [fit_str_headCirc_indv, data_headCirc_NEU, 'NEU_cohorts_headCirc_UTAS.txt'],
                [fit_str_birthLen_indv, data_birthLen_NEU, 'NEU_cohorts_birthLen_UTAS.txt'],
                [fit_str_outcome_weeks_indv, data_outcome_weeks_DAR, 'DAR_cohorts_outcome_UTAS.txt'],
                [fit_str_birthWt_indv, data_birthWt_DAR, 'DAR_cohorts_birthWt_UTAS.txt'],
                [fit_str_headCirc_indv, data_headCirc_DAR, 'DAR_cohorts_headCirc_UTAS.txt'],
                [fit_str_birthLen_indv, data_birthLen_DAR, 'DAR_cohorts_birthLen_UTAS.txt'],
                [fit_str_outcome_weeks_indv, data_outcome_weeks_UNM, 'UNM_cohorts_outcome_UTAS.txt'],
                [fit_str_birthWt_indv, data_birthWt_UNM, 'UNM_cohorts_birthWt_UTAS.txt'],
                [fit_str_headCirc_indv, data_headCirc_UNM, 'UNM_cohorts_headCirc_UTAS.txt'],
                [fit_str_birthLen_indv, data_birthLen_UNM, 'UNM_cohorts_birthLen_UTAS.txt']]

In [23]:
%%R

library(lme4)

run_lm <- function(fit_str, df, fileout ) {
    m<-lm(fit_str, data=df)
    sink(paste(outputdir_lme, fileout, sep = '/'))
    print(fit_str)
    print(summary(m))
    print(paste(toString(NROW(df)), " observations.", sep = " "))
    sink()
    print('Done')
}


R[write to console]: Loading required package: Matrix



In [24]:
for fit_str, df, fileout in analysis_info:
    
    %Rpush fit_str
    %Rpush df
    %Rpush fileout

    %R run_lm(fit_str, df, fileout)

[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"


In [25]:
print(data_outcome_weeks_DAR.shape)

(1563, 17)


In [26]:
# Analysis for individual logistic regression results
data_Outcome_NEU = bin_frames_to_r_indv['NEU|Outcome']
data_LGA_NEU = bin_frames_to_r_indv['NEU|LGA']
data_SGA_NEU = bin_frames_to_r_indv['NEU|SGA']

data_Outcome_DAR = bin_frames_to_r_indv['DAR|Outcome']
data_LGA_DAR = bin_frames_to_r_indv['DAR|LGA']
data_SGA_DAR = bin_frames_to_r_indv['DAR|SGA']

data_Outcome_UNM = bin_frames_to_r_indv['UNM|Outcome']
data_LGA_UNM = bin_frames_to_r_indv['UNM|LGA']
data_SGA_UNM = bin_frames_to_r_indv['UNM|SGA']


%Rpush data_Outcome_NEU
%Rpush data_LGA_NEU
%Rpush data_SGA_NEU

%Rpush data_Outcome_DAR
%Rpush data_LGA_DAR
%Rpush data_SGA_DAR

%Rpush data_Outcome_UNM
%Rpush data_LGA_UNM
%Rpush data_SGA_UNM


fit_str_outcome_indv = 'Outcome ~ parity + babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 

fit_str_SGA_indv = 'SGA ~ parity + babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 

fit_str_LGA_indv = 'LGA ~ parity + babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS' 



%Rpush fit_str_outcome_indv
%Rpush fit_str_SGA_indv
%Rpush fit_str_LGA_indv

%Rpush outputdir_glm

In [27]:
analysis_info = [[fit_str_outcome_indv, data_Outcome_NEU, 'NEU_outcome_UTAS.txt'],
                [fit_str_LGA_indv, data_LGA_NEU, 'NEU_LGA_UTAS.txt'],
                [fit_str_SGA_indv, data_SGA_NEU, 'NEU_SGA_UTAS.txt'],
                [fit_str_outcome_indv, data_Outcome_DAR, 'DAR_outcome_UTAS.txt'],
                [fit_str_LGA_indv, data_LGA_DAR, 'DAR_LGA_UTAS.txt'],
                [fit_str_SGA_indv, data_SGA_DAR, 'DAR_SGA_UTAS.txt'],
                [fit_str_outcome_indv, data_Outcome_UNM, 'UNM_outcome_UTAS.txt'],
                [fit_str_LGA_indv, data_LGA_UNM, 'UNM_LGA_UTAS.txt'],
                [fit_str_SGA_indv, data_SGA_UNM, 'UNM_SGA_UTAS.txt']]

In [28]:
#NEU

In [29]:
%%R

library(lme4)


run_glm <- function(fit_str, df, fileout ) {
    m<-glm(fit_str, data=df, family = 'binomial')
    sink(paste(outputdir_glm, fileout, sep = '/'))
    print(fit_str)
    print(summary(m))
    print(paste(toString(NROW(df)), " observations.", sep = " "))
    sink()
    print('Done')
}


In [30]:
for fit_str, df, fileout in analysis_info:
    
    %Rpush fit_str
    %Rpush df
    %Rpush fileout

    %R run_glm(fit_str, df, fileout)

[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"
[1] "Done"


In [31]:
# Analysis for combined DATA

In [32]:
data_outcome_weeks = frames_to_r_all['ALL|Outcome_weeks']
data_birthWt = frames_to_r_all['ALL|birthWt']
data_headCirc = frames_to_r_all['ALL|headCirc']
data_birthLen = frames_to_r_all['ALL|birthLen']

%Rpush data_outcome_weeks
%Rpush data_birthWt
%Rpush data_headCirc
%Rpush data_birthLen

In [33]:
%Rpush outputdir_lmer

In [34]:
fit_str_outcome_weeks = 'Outcome_weeks ~ parity + babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 

fit_str_birthWt = 'birthWt ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 

fit_str_headCirc = 'headCirc ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 

fit_str_birthLen = 'birthLen ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 



%Rpush fit_str_outcome_weeks
%Rpush fit_str_birthWt
%Rpush fit_str_headCirc
%Rpush fit_str_birthLen



In [35]:
analysis_info = [[fit_str_outcome_weeks, data_outcome_weeks, 'all_cohorts_outcome_weeks_UTAS.txt'],
                [fit_str_birthWt, data_birthWt, 'all_cohorts_birthWt_UTAS.txt'],
                [fit_str_headCirc, data_headCirc, 'all_cohorts_headCirc_UTAS.txt'],
                [fit_str_birthLen, data_birthLen, 'all_cohorts_birthLen_UTAS.txt']]

In [36]:
%%R

library(lme4)


run_lmer <- function(fit_str, df, fileout ) {
    m<-lmer(fit_str, data=df)
    sink(paste(outputdir_lmer, fileout, sep = '/'))
    print(fit_str)
    print(summary(m))
    print(paste(toString(NROW(df)), " observations.", sep = " "))
    sink()
    print('Done')
}


In [37]:
for fit_str, df, fileout in analysis_info:
    
    %Rpush fit_str
    %Rpush df
    %Rpush fileout

    %R run_lmer(fit_str, df, fileout)

R[write to console]: boundary (singular) fit: see help('isSingular')



[1] "Done"


R[write to console]: boundary (singular) fit: see help('isSingular')



[1] "Done"


R[write to console]: boundary (singular) fit: see help('isSingular')



[1] "Done"


R[write to console]: boundary (singular) fit: see help('isSingular')



[1] "Done"


In [38]:
# binomial 

## TODO

necessariy imports:

import rpy2
import pandas as pd
%load_ext rpy2.ipython
%R require("ggplot2")
creating dummy data frames:

df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]})
df2 = pd.DataFrame({"A": [3, 2, 1], "B": [1, 2, 3]})
writing the plotting function:

%%R
plot_gg <- function(df) {
    p <- ggplot(data=df) + geom_line(aes(x=A, y=B))
    print(p)
}
and finally plot with only two lines of code:

for df in df1, df2:
    %Rpush df
    %R plot_gg(df)

In [39]:

data_Outcome = bin_frames_to_r_all['ALL|Outcome']
data_LGA = bin_frames_to_r_all['ALL|LGA']
data_SGA = bin_frames_to_r_all['ALL|SGA']

#data_Outcome.loc[0:50,'CohortType'] = 'NEU2'

%Rpush data_Outcome
%Rpush data_LGA
%Rpush data_SGA


fit_str_outcome = 'Outcome ~parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 

fit_str_SGA = 'SGA ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 

fit_str_LGA = 'LGA ~ parity +  babySex_2.0 + education_2.0 + education_3.0 + education_4.0 + \
education_5.0 + BMI + UTAS + (1|CohortType)' 


%Rpush fit_str_outcome
%Rpush fit_str_SGA
%Rpush fit_str_LGA
%Rpush outputdir_glmer


analysis_info = [[fit_str_outcome, data_Outcome, 'all_cohorts_Outcome_UTAS.txt'],
                [fit_str_SGA, data_SGA, 'all_cohorts_SGA_UTAS.txt'],
                [fit_str_LGA, data_LGA, 'all_cohorts_LGA_UTAS.txt']]

In [40]:
%%R

library(lme4)


run_glmer <- function(fit_str, df, fileout ) {
    m<-glmer(fit_str, data=df, family = binomial)
    sink(paste(outputdir_glmer, fileout, sep = '/'))
    print(fit_str)
    print(summary(m))
    print(paste(toString(NROW(df)), " observations.", sep = " "))
    sink()
}


In [41]:
for fit_str, df, fileout in analysis_info:
    
    %Rpush fit_str
    %Rpush df
    %Rpush fileout

    %R run_glmer(fit_str, df, fileout)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

