# Double Logistic Regression with the Titanic Dataset

In [21]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

np.random.seed(0)

# Load the Titanic dataset
data = pd.read_csv('titanic.csv')

# Create a binary indicator for the treatment variable (class)
data['class_1st'] = (data['Pclass'] == 1).astype(int)
data['Sex_male'] = (data['Sex'] == 'male').astype(int)
data = data.dropna(subset = ['Survived', 'class_1st', 'SibSp', 'Age', 'Parch', 'Sex_male']).reset_index()

print(len(data))
data.head()

714


Unnamed: 0,index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,class_1st,Sex_male
0,0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,1
1,1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1,0
2,2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,0
3,3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1,0
4,4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,1


In [22]:
data = data[['Survived', 'class_1st', 'SibSp', 'Age', 'Parch', 'Sex_male']]
data.head()

Unnamed: 0,Survived,class_1st,SibSp,Age,Parch,Sex_male
0,0,0,1,22.0,0,1
1,1,1,1,38.0,0,0
2,1,0,0,26.0,0,0
3,1,1,1,35.0,0,0
4,0,0,0,35.0,0,1


In [23]:
# Specify the response and treatment variables
y = data['Survived']
x = data[['class_1st', 'SibSp', 'Age', 'Parch', 'Sex_male']]

# Fit a logistic regression model
model = sm.Logit(y, x)
result = model.fit()

# Print the treatment effect estimates
print(result.params)

Optimization terminated successfully.
         Current function value: 0.500794
         Iterations 6
class_1st    1.751649
SibSp       -0.046076
Age          0.003590
Parch        0.143530
Sex_male    -2.034606
dtype: float64


The output will give you the estimated treatment effects for each covariate in the model. The treatment effect for class_1st (the binary indicator for first-class passengers) represents the difference in the odds of survival between first-class passengers and passengers in other classes, controlling for the effects of age, number of siblings/spouses on board, number of parents/children on board, fare price, and gender.

A positive estimate for the treatment effect of class_1st would indicate that first-class passengers had a higher odds of survival compared to passengers in other classes, while a negative estimate would indicate the opposite. The magnitude of the estimate will give you a sense of how large the difference in odds of survival was between first-class passengers and passengers in other classes, controlling for the other covariates.

<hr>

In the first stage, a logistic regression model is fit to predict the probability of the main outcome as a function of the risk factor and other covariates. In the second stage, a logistic regression model is fit to predict the probability of the secondary outcome as a function of the risk factor, other covariates, and the predicted probability of the main outcome from the first stage.

So, double logistic regression is not just fitting two separate logistic regression models, but rather fitting two logistic regression models in two stages, with the results from the first stage being used as a predictor in the second stage.

In [30]:
model1 = sm.Logit(y, x).fit()
y_hat = np.array(model1.predict(x)).reshape(len(x), 1)
x_new = np.hstack((x, y_hat))
model2 = sm.Logit(y, x_new).fit()
print(model2.params)

Optimization terminated successfully.
         Current function value: 0.500794
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.464138
         Iterations 6
x1    0.774058
x2   -0.337805
x3   -0.041707
x4   -0.191545
x5   -1.065680
x6    4.042893
dtype: float64


<hr>

In [32]:
# Define the number of bootstrap resamples
n_resamples = 1000

# Initialize a matrix to store the treatment effect estimates
treat_effects = np.zeros((n_resamples, model2.params.shape[0] - 1))

# Use bootstrapping to estimate the standard error of the treatment effects
i = 0
while i < n_resamples:
    resample_index = np.random.choice(data.index, size = data.index.size, replace = True)
    resample = data.iloc[resample_index]
    x_resample = x.iloc[resample_index]
    y_resample = y.iloc[resample_index]
    model1 = sm.Logit(y_resample, x_resample).fit()
    y_hat = np.array(model1.predict(x_resample)).reshape(len(y_hat), 1)
    x_new = np.hstack((x_resample, y_hat))
    model2 = sm.Logit(y_resample, x_new).fit()
    treat_effects[i, :] = model2.params[:-1]
    i += 1

# Calculate the standard error of the treatment effects
treat_effects_se = treat_effects.std(axis=0)

# Print the standard errors of the treatment effect estimates
print('Standard errors of the treatment effects:')
print(treat_effects_se)

Optimization terminated successfully.
         Current function value: 0.474510
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.433489
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.475444
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.443791
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.464648
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.444162
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.498589
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468372
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.495590
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.459258
  

Optimization terminated successfully.
         Current function value: 0.491983
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.482286
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.431330
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.501114
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.477839
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.509687
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.472572
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.496472
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.452875
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.504869
  

Optimization terminated successfully.
         Current function value: 0.497487
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.456162
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.506797
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468120
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.494340
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.443764
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.524476
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.493471
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.498385
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.440843
  

Optimization terminated successfully.
         Current function value: 0.467053
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.438122
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.507978
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.462894
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.543868
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.525511
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.480353
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.448997
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.471422
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.432776
  

Optimization terminated successfully.
         Current function value: 0.451560
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.487007
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.464051
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.497768
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.470883
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.522773
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.481299
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.524455
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479386
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.535110
  

Optimization terminated successfully.
         Current function value: 0.422883
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.522381
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.488693
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.520531
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.492230
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.506089
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.470950
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.502714
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.478664
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.493360
  

Optimization terminated successfully.
         Current function value: 0.475719
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.435850
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.457983
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.421168
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.486545
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.460458
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.529500
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.475564
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.511519
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.492184
  

Optimization terminated successfully.
         Current function value: 0.474539
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.451014
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.471556
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.450804
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.477649
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.417170
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.489100
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.456057
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.515774
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.481548
  

Optimization terminated successfully.
         Current function value: 0.427471
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.473529
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.454543
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.489904
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.462160
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479968
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.427637
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.494546
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.431164
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.519441
  

Optimization terminated successfully.
         Current function value: 0.486143
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.510710
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.466224
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479678
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.450114
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.509143
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.461674
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.501331
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.445626
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.483497
  

Optimization terminated successfully.
         Current function value: 0.476267
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.447014
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.501381
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468982
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.461887
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.428990
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.492576
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.475412
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.518021
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.477752
  

Optimization terminated successfully.
         Current function value: 0.505872
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.469156
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.484981
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.465123
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.503426
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.452127
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.522216
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.483964
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.483054
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.455779
  

Optimization terminated successfully.
         Current function value: 0.488592
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.465428
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.527813
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.497785
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.521049
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.483126
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.486352
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.459779
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.519662
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.478639
  

Optimization terminated successfully.
         Current function value: 0.484137
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.451082
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.486669
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.465306
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.472333
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.437582
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.511903
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.481012
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.488001
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.465381
  

Optimization terminated successfully.
         Current function value: 0.433169
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.512397
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.474692
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.510409
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.466939
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.520491
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.487649
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.488442
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.445655
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479014
  

Optimization terminated successfully.
         Current function value: 0.470000
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.427628
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.465616
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.444972
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.506038
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.464454
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.487135
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.458657
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.475836
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.444514
  

Optimization terminated successfully.
         Current function value: 0.513629
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.484387
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.531727
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.496331
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.501962
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.476184
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.488145
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.459642
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.507308
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.464861
  

Optimization terminated successfully.
         Current function value: 0.486363
         Iterations 6
Standard errors of the treatment effects:
[0.19250351 0.06717248 0.00764765 0.06701075 0.16259901]


We generate 1000 resamples of the data, fit the double logistic regression model to each resample, and store the estimated treatment effects for each covariate. Finally, we calculate the standard deviation of the treatment effects across the resamples as an estimate of the standard error. The output will give you the standard errors for each covariate in the model, representing the difference in the log-odds of survival between first-class passengers and passengers in other classes, controlling for the effects of age, number, and other covariates.