# TensorCox
- example script to apply for a CSV file.

## Load modules:

In [26]:
# import the necessary modules
import sys
import os
import torch
import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

from torch.utils.data import DataLoader
from sklearn.model_selection import KFold

# set root directory
# path to the folder
dir_root = '/nfs/nobackup/gerstung/awj/projects/TensorCox_/'
os.chdir(dir_root)

# appends the path to the COX script 
sys.path.append(dir_root + 'TensorCox/')

# import COX model
from TensorCox import loglikelihood
from TensorCox import Fisher
from metrics import concordance
from metrics import RMSE
from dataloader import CSV_Dataset
from dataloader import ToTensor
from dataloader import custom_collate

torch.manual_seed(7)
np.random.seed(7)

## Data:

In [20]:
lung = pd.read_csv('data/lung.csv', sep=';')
surv = np.asarray(lung[['start', 'time', 'status']])
X = np.asarray(lung[['age', 'sex', 'ph.ecog', 'ph.karno', 'pat.karno', 'meal.cal', 'wt.loss']])
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

surv = torch.from_numpy(surv)
X = torch.from_numpy(X)

lung

Unnamed: 0,start,time,status,age,sex,ph.ecog,ph.karno,pat.karno,meal.cal,wt.loss
0,0,455,1,68,1,0.0,90.0,90.0,1225.0,15.0
1,0,210,1,57,1,1.0,90.0,60.0,1150.0,11.0
2,0,1022,0,74,1,1.0,50.0,80.0,513.0,0.0
3,0,310,1,68,2,2.0,70.0,60.0,384.0,10.0
4,0,361,1,71,2,2.0,60.0,80.0,538.0,1.0
...,...,...,...,...,...,...,...,...,...,...
162,0,203,0,71,2,1.0,80.0,90.0,1025.0,0.0
163,0,191,0,39,1,0.0,90.0,90.0,2350.0,-5.0
164,0,105,0,75,2,2.0,60.0,70.0,1025.0,5.0
165,0,174,0,66,1,1.0,90.0,100.0,1075.0,1.0


## Opimizer:

In [31]:
# optimizer
parameters = 7
theta = torch.normal(0, 0.01, (parameters, 1), dtype=torch.float64, requires_grad=True)
eta = 0.00
lr = 0.001
optimizer = torch.optim.SGD([theta], lr=lr)

## Model fit:

In [34]:
for _ in tqdm.tqdm(range(1000000)):
    idx = np.random.choice(X.shape[0], 40, replace=True)
    optimizer.zero_grad()
    linpred = torch.mm(X, theta)
    logL = -loglikelihood(surv, linpred) 
    logL.backward()
    optimizer.step()


  8%|▊         | 79563/1000000 [00:50<09:39, 1588.66it/s]


KeyboardInterrupt: 

In [33]:
for ii in theta:
    print(ii[0].detach().numpy())

0.09624067296050627
-0.26614169506970686
0.5020921657948615
0.25712114853900303
-0.1860028903766889
0.010375181945712715
-0.18413280102918064


## Variance estimation

In [25]:
# Fisher information
# split into random data splits - emulate batch sampling
kf = KFold(n_splits=4, shuffle=True)
kf.get_n_splits(np.arange(X.shape[0]))

A = np.zeros((7,7))
A = torch.from_numpy(A)
for _, idx in kf.split(X):
        linpred = torch.mm(X, theta)
        A += Fisher(surv[idx], X[idx], linpred[idx])
print(torch.diagonal(torch.sqrt(torch.inverse(A))))

tensor([0.1093, 0.1036, 0.1754, 0.1514, 0.1231, 0.1123, 0.1082],
       dtype=torch.float64)
