# Attack on Cox Proportional Hazards Model

## Load libraries and define settings

In [1]:
# general dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
from pathlib import Path
import warnings

In [2]:
# ignore deprecation warning
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# autoreload changes from local files
%load_ext autoreload
%autoreload 2

# pandas show full output
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load example dataset

In [3]:
df = pd.DataFrame({
    'patient': [1, 2, 3],
    'time': [5, 3, 4],
    'event': [1, 1, 0],
    'x1': [2, 1, 3],
    'x2': [1, 2, 1]
})
df


Unnamed: 0,patient,time,event,x1,x2
0,1,5,1,2,1
1,2,3,1,1,2
2,3,4,0,3,1


## Attack
- initalize model
- compute gradients using data
- reverse engineer original records that resulted in gradients

In [4]:
init_beta = [0.1, 0.2]

In [10]:
def compute_gradient(beta, event, time, covariates):
    gradient = np.zeros(len(beta))
    for i in range(len(event)):
        hazard = np.exp(np.dot(beta, covariates[i]))
        gradient += covariates[i] * (event[i] - hazard)
    return gradient

compute_gradient(beta=init_beta, event=df['event'], time=df['time'], covariates=df[['x1', 'x2']].values)

array([-6.57853448, -3.43798851])

In [5]:
def compute_gradient(df, event_column, time_column, covariate_columns, cox_params):
    df_ordered = df.sort_values(by=[time_column])
    gradient = np.zeros(len(cox_params))
    for i in range(len(df_ordered)):
        hazard = np.exp(np.dot(cox_params, df_ordered[covariate_columns].values[i]))
        gradient += df_ordered[covariate_columns].values[i] * (df_ordered[event_column].values[i] - hazard)
    return gradient

compute_gradient(df=df, event_column='event', time_column='time', covariate_columns=['x1', 'x2'], cox_params=init_beta)


array([-6.57853448, -3.43798851])

In [8]:
def compute_gradient(df, event_column, time_column, covariate_columns, cox_params):
    df_ordered = df.sort_values(by=[time_column])

    E = df_ordered[event_column].values
    X = df_ordered[covariate_columns].values


    gradient = np.zeros(len(cox_params))
    mask_at_risk = np.ones(len(df_ordered))
    for i in range(len(df_ordered)):
        # censored event don't compute gradient
        if E[i] == 0:
            continue
        
        # compute hazard risk group
        hazard_term = np.exp(np.dot(cox_params, X[mask_at_risk]))
        hazard = (hazard_term * X[mask_at_risk]) / hazard_term.sum()
        gradient = X[i] - hazard

        # remove from risk set
        mask_at_risk[i] = 0

    return gradient

compute_gradient(df=df, event_column='event', time_column='time', covariate_columns=['x1', 'x2'], cox_params=init_beta)
    

IndexError: arrays used as indices must be of integer (or boolean) type

In [None]:
def gradient_update(beta, gradient, learning_rate):
    return beta + learning_rate * gradient
