# TensorCox
- example script to apply for a CSV file.

## Load modules:

In [1]:
# import the necessary modules
import sys
import os
import torch
import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

from torch.utils.data import DataLoader
from sklearn.model_selection import KFold

# set root directory
# path to the folder
dir_root = '/nfs/nobackup/gerstung/awj/projects/TensorCox_/'
os.chdir(dir_root)

# appends the path to the COX script 
sys.path.append(dir_root + 'TensorCox/')

# import COX model
from TensorCox import loglikelihood
from TensorCox import Fisher
from metrics import concordance
from metrics import RMSE
from dataloader import CSV_Dataset
from dataloader import ToTensor
from dataloader import custom_collate

torch.manual_seed(7)
np.random.seed(7)

## Data:

In [2]:
colon = pd.read_csv('data/colon.csv', sep=',')
surv = np.asarray(colon[['time', 'status']])
surv = np.concatenate((np.zeros((surv.shape[0], 1)), surv), axis=1)
X = np.asarray(colon[['sex', 'age', 'obstruct', 'perfor', 'adhere', 'nodes', 'differ', 'extent', 'surg', 'node4', 'etype']])
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

surv = torch.from_numpy(surv)
X = torch.from_numpy(X)

colon

Unnamed: 0.1,Unnamed: 0,sex,age,obstruct,perfor,adhere,nodes,status,differ,extent,surg,node4,time,etype
0,1,1,43,0,0,0,5,1,2,3,0,1,1521,2
1,2,1,43,0,0,0,5,1,2,3,0,1,968,1
2,3,1,63,0,0,0,1,0,2,3,0,0,3087,2
3,4,1,63,0,0,0,1,0,2,3,0,0,3087,1
4,5,0,71,0,0,1,7,1,2,2,0,1,963,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1771,1854,1,76,0,0,1,1,1,3,3,0,0,851,1
1772,1855,0,48,1,0,0,4,0,2,3,1,1,2072,2
1773,1856,0,48,1,0,0,4,0,2,3,1,1,2072,1
1774,1857,0,66,1,0,0,1,0,2,3,0,0,1820,2


## Opimizer:

In [3]:
# optimizer
parameters = X.shape[1]
theta = torch.normal(0, 0.01, (parameters, 1), dtype=torch.float64, requires_grad=True)
eta = 0.00
lr = 0.01
optimizer = torch.optim.Adam([theta], lr=lr)

## Model fit:

In [11]:
for _ in tqdm.tqdm(range(1000)):
    idx = np.random.choice(X.shape[0], 50, replace=True)
    optimizer.zero_grad()
    linpred = torch.mm(X, theta)
    logL = -loglikelihood(surv, linpred) 
    logL.backward()
    optimizer.step()


100%|██████████| 1000/1000 [00:03<00:00, 261.55it/s]


In [12]:
for ii in theta:
    print(ii[0].detach().numpy())

-0.02436415253403657
0.01894718134236788
0.09089355211474821
0.018971089020813195
0.06309577340626127
0.1480329617609747
0.057251092182696674
0.21622182313266458
0.11753246137106388
0.2757621682045679
-0.12840411066624036


## Variance estimation

In [13]:
# Fisher information
# split into random data splits - emulate batch sampling
kf = KFold(n_splits=int(np.round(colon.shape[0]/50)), shuffle=True)
kf.get_n_splits(np.arange(X.shape[0]))

A = np.zeros((X.shape[1],X.shape[1]))
A = torch.from_numpy(A)
with torch.no_grad():
    for _, idx in kf.split(X):
            linpred = torch.mm(X, theta)
            A += Fisher(surv[idx], X[idx], linpred[idx])
print(torch.diagonal(torch.sqrt(torch.inverse(A))))

tensor([0.0347, 0.0354, 0.0341, 0.0321, 0.0335, 0.0407, 0.0364, 0.0412, 0.0337,
        0.0468, 0.0345], dtype=torch.float64)


In [14]:
# Concordance

In [15]:
concordance(surv.detach().numpy(), torch.mm(X, theta).detach().numpy())

0.6661688241786037

# R code: 

rm(list=ls())
library(survival)
library(stargazer)

data("colon")
colon

colon = colon[ , 4:16]
colon <- colon[is.finite(rowSums(colon)),]

write.csv(colon, '/Users/awj/Desktop/colon.csv', sep=';')

standardize <- function(x){
  (x - mean(x))/sqrt(var(x))
}

colon[ , c(1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13)] = apply(colon[ , c(1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13)], 2, standardize)

names(colon)
m1 = coxph(Surv(time, status) ~ sex + age  + obstruct + perfor + adhere  + nodes + differ + extent + surg + node4 + etype, data=colon)
summary(m1)