In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import os
from lifelines import CoxPHFitter
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from utils import *

In [2]:
# Set Pandas option to display all columns
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("../data/modified_small.csv")
df_long = expand_to_long_format(df)

In [7]:
df_long["cycle_cat"] = pd.cut(
    df_long["cycle"],
    bins=[0, 3, 6, 9, 12, np.inf],
    labels=["1-3", "4-6", "7-9", "10-12", "13+"]
)

In [None]:
# Define input variables

numerical_vars = [
    #"cycle",
]


categorical_vars = {
    'age_group':'30-35',
    'bmi_group':'normal',
    'been_pregnant_before_binary':False,
    'average_cycle_length_group':'21-35',
    'regular_cycle':True,
    'university_education':False,
    'regular_sleep':True,
    'intercourse_frequency_group':'low',
    'dedication_group':'low',
    'cycle_cat': '1-3'
}

In [10]:
X = prepare_model_matrix(
    df=df_long.drop(columns=['event_this_cycle', 'outcome_pregnant', 'n_cycles_trying']),
    categorical_baselines=categorical_vars,
    center_numeric=numerical_vars
)
y = df_long["event_this_cycle"]

X = X.astype({col: 'int' for col in X.select_dtypes(bool).columns})


# Drop missing values
mask = X.notnull().all(axis=1) & y.notnull()
X = X[mask]
y = y[mask]

In [12]:
# STEP 4: Fit logistic regression model
X = sm.add_constant(X)
model = sm.Logit(y, X) # discrete-time proportional odds model
result = model.fit()

# STEP 5: Summarize results
summary = result.summary2().tables[1]
summary["Odds Ratio"] = np.exp(summary["Coef."])
summary["CI Lower"] = np.exp(summary["Coef."] - 1.96 * summary["Std.Err."])
summary["CI Upper"] = np.exp(summary["Coef."] + 1.96 * summary["Std.Err."])

# Display final table
#print(summary[["Coef.", "Std.Err.", "P>|z|", "Odds Ratio", "CI Lower", "CI Upper"]])

Optimization terminated successfully.
         Current function value: 0.337184
         Iterations 9


In [4]:
#plot_odds_ratios(result, log_scale=True, show_cycle=True)

In [3]:
#plot_odds_ratios(result, log_scale=False)