In [1]:
import pandas as pd
import numpy as np
from scipy import stats as st
import statsmodels.stats.api as sms
from statsmodels.stats.proportion import proportion_confint, confint_proportions_2indep

In [2]:
df = pd.read_csv('resp1.csv', sep='@')

In [3]:
df

Unnamed: 0,gender,SITEID,SUBJID,TRTPN,responseCategory
0,MALE,1,27,2,SD
1,FEMALE,1,39,1,PD
2,MALE,1,126,2,PD
3,MALE,1,154,1,SD
4,FEMALE,1,161,1,PD
...,...,...,...,...,...
577,FEMALE,97,758,2,SD
578,FEMALE,98,293,2,SD
579,MALE,99,176,2,SD
580,MALE,99,564,2,PR


In [4]:
df.rename(columns={'SITEID': 'site_id', 'SUBJID': 'subj_id', 'TRTPN': 'treatment', 'responseCategory': 'response_cat'}, inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 582 entries, 0 to 581
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   gender        582 non-null    object
 1   site_id       582 non-null    int64 
 2   subj_id       582 non-null    int64 
 3   treatment     582 non-null    int64 
 4   response_cat  582 non-null    object
dtypes: int64(3), object(2)
memory usage: 22.9+ KB


In [6]:
df.isna().sum().mul(100)

gender          0
site_id         0
subj_id         0
treatment       0
response_cat    0
dtype: int64

In [7]:
rc = []
tr = []

val_1 = 1
val_0 = 0

for responce in df['response_cat']:
  if responce in ('CR', 'PR'):
    rc.append(val_1)
  else:
    rc.append(val_0)

for treat in df['treatment']:
    if treat == 1:
        tr.append(val_1)
    else:
        tr.append(val_0)
        
df['response'] = pd.Series(rc)
df['treatment'] = pd.Series(tr)

In [18]:
matrix = df.groupby(['treatment', 'response']) \
    .agg({'response_cat': 'count'}) \
    .groupby(level=['treatment', 'response']) \
    .sum().values

table = np.array(
    [
        [matrix[-1][0], matrix[-2][0]],
        [matrix[-3][0], matrix[-4][0]]
    ]
)

oddsratio, pvalue = st.fisher_exact(table)

print(f"OddsR: {round(oddsratio, 2)}")

wald_CI = confint_proportions_2indep(
    count1=matrix[-1][0], nobs1=matrix[-2][0],
    count2=matrix[-3][0], nobs2=matrix[-4][0],
    compare='or',
    correction=True
)

print(f"OddsR with CI: {round(oddsratio, 2)} {round(wald_CI[0], 2), round(wald_CI[1], 2)}")

OddsR: 1.67
OddsR with CI: 1.67 (1.18, 2.97)


In [9]:
matrix_male = df.query("gender == 'MALE'") \
    .groupby(['treatment', 'response']) \
    .agg({'response_cat': 'count'}) \
    .groupby(level=['treatment', 'response']) \
    .sum().values

table_male = np.array(
    [
        [matrix_male[-1][0], matrix_male[-2][0]],
        [matrix_male[-3][0], matrix_male[-4][0]]
    ]
)

oddsratio_male, pvalue_male = st.fisher_exact(table_male)

print(f"OddsR (Male): {round(oddsratio_male, 2)}")

wald_CI_male = confint_proportions_2indep(
    count1=matrix_male[-1][0], nobs1=matrix_male[-2][0],
    count2=matrix_male[-3][0], nobs2=matrix_male[-4][0],
    compare='or',
    correction=True
)

print(f"OddsR (Male) with CI: {round(oddsratio_male, 2)} {round(wald_CI_male[0], 2), round(wald_CI_male[1], 2)}")

OddsR (Male): 1.99
OddsR (Male) with CI: 1.99 (1.26, 4.31)


In [10]:
matrix_female = df.query("gender == 'FEMALE'") \
    .groupby(['treatment', 'response']) \
    .agg({'response_cat': 'count'}) \
    .groupby(level=['treatment', 'response']) \
    .sum().values

table_female = np.array(
    [
        [matrix_female[-1][0], matrix_female[-2][0]],
        [matrix_female[-3][0], matrix_female[-4][0]]
    ]
)

oddsratio_female, pvalue_female = st.fisher_exact(table_female)

print(f"OddsR (Female): {round(oddsratio_female, 2)}")

wald_CI_female = confint_proportions_2indep(
    count1=matrix_female[-1][0], nobs1=matrix_female[-2][0],
    count2=matrix_female[-3][0], nobs2=matrix_female[-4][0],
    compare='or',
    correction=True
)

print(f"OddsR (Female) with CI: {round(oddsratio_female, 2)} {round(wald_CI_female[0], 2), round(wald_CI_female[1], 2)}")

OddsR (Female): 1.36
OddsR (Female) with CI: 1.36 (0.72, 2.86)


In [11]:
df_gender = df.copy()

In [12]:
gender = []

val_1 = 1
val_0 = 0


for sex in df_gender['gender']:
    if sex == 'FEMALE':
        gender.append(val_1)
    else:
        gender.append(val_0)
        
df_gender['sex'] = pd.Series(gender)


In [15]:
matrix_treat1 = df_gender.query("treatment == 1") \
    .groupby(['sex', 'response']) \
    .agg({'response_cat': 'count'}) \
    .groupby(level=['sex', 'response']) \
    .sum().values

table_treat1 = np.array(
    [
        [matrix_treat1[-1][0], matrix_treat1[-2][0]],
        [matrix_treat1[-3][0], matrix_treat1[-4][0]]
    ]
)

oddsratio_treat1, pvalue_treat1 = st.fisher_exact(table_treat1)

print(f"OddsR Female vs. Male (Treatment 1): {round(oddsratio_treat1, 2)}")

wald_CI_treat1 = confint_proportions_2indep(
    count1=matrix_treat1[-1][0], nobs1=matrix_treat1[-2][0],
    count2=matrix_treat1[-3][0], nobs2=matrix_treat1[-4][0],
    compare='or',
    correction=True
)

print(f"OddsR Female vs. Male (Treatment 1) with CI: {round(oddsratio_treat1, 2)} {round(wald_CI_treat1[0], 2), round(wald_CI_treat1[1], 2)}")

OddsR Female vs. Male (Treatment 1): 0.76
OddsR Female vs. Male (Treatment 1) with CI: 0.76 (0.39, 1.29)


In [16]:
matrix_treat2 = df_gender.query("treatment == 0") \
    .groupby(['sex', 'response']) \
    .agg({'response_cat': 'count'}) \
    .groupby(level=['sex', 'response']) \
    .sum().values

table_treat2 = np.array(
    [
        [matrix_treat2[-1][0], matrix_treat2[-2][0]],
        [matrix_treat2[-3][0], matrix_treat2[-4][0]]
    ]
)

oddsratio_treat2, pvalue_treat2 = st.fisher_exact(table_treat2)

print(f"OddsR Female vs. Male (Treatment 2): {round(oddsratio_treat2, 2)}")

wald_CI_treat2 = confint_proportions_2indep(
    count1=matrix_treat2[-1][0], nobs1=matrix_treat2[-2][0],
    count2=matrix_treat2[-3][0], nobs2=matrix_treat2[-4][0],
    compare='or',
    correction=True
)

print(f"OddsR Female vs. Male (Treatment 2) with CI: {round(oddsratio_treat2, 2)} {round(wald_CI_treat2[0], 2), round(wald_CI_treat2[1], 2)}")

OddsR Female vs. Male (Treatment 2): 1.12
OddsR Female vs. Male (Treatment 2) with CI: 1.12 (0.57, 2.31)
