# Basic Cox time-varying proportional hazards model

In [1]:
import sys
import logging
from pathlib import Path

import pandas as pd
from prefect import Flow

from nba_survival.model.tasks import (
    SurvivalData,
    SegmentData,
    InitializeLifelines,
    FitLifelinesModel,
    CollapseData,
    PredictLifelines,
    ConcordanceIndex,
    HyperparameterTuning
)
from nba_survival.model.tasks.meta import META

## Load the data

In [2]:
df = pd.concat(
    pd.read_csv(fpath, sep="|", dtype={"GAME_ID": str}, index_col=0)
    for fpath in Path("..", "nba-data", "2018-19", "model-data").glob("data_*.csv")
).reset_index(drop=True)

In [3]:
df[
    [META["id"]] + META["dynamic"] + META["static"]
].head(n=5)

Unnamed: 0,GAME_ID,SCOREMARGIN,HOME_LINEUP_PLUS_MINUS,VISITOR_LINEUP_PLUS_MINUS,HOME_NET_RATING,VISITOR_NET_RATING,HOME_W_PCT,VISITOR_W_PCT,LAST_GAME_WIN,HOME_GAMES_IN_LAST_3_DAYS,HOME_GAMES_IN_LAST_5_DAYS,HOME_GAMES_IN_LAST_7_DAYS,VISITOR_GAMES_IN_LAST_3_DAYS,VISITOR_GAMES_IN_LAST_5_DAYS,VISITOR_GAMES_IN_LAST_7_DAYS
0,21800001,0,0.1,-0.1,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
1,21800001,0,0.1,-0.1,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
2,21800001,0,0.1,-0.1,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
3,21800001,0,0.1,-0.1,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
4,21800001,0,0.1,-0.1,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0


## Build a basic model (default values)

In [4]:
format_data = SurvivalData(name="Convert input data to range form")
segdata = SegmentData(name="Create train and test data")
model = InitializeLifelines(name="Initialize Cox model")
trained = FitLifelinesModel(name="Fit Cox model")
collapsed = CollapseData(name="Collapse training data")
predict = PredictLifelines(name="Predict partial hazard")
concord = ConcordanceIndex(name="Calculate C-Index")

In [5]:
with Flow(name="My model pipeline") as flow:
    alldata = format_data(df)
    data = segdata(alldata)
    model_obj = model()
    fitted = trained(model=model_obj, data=data["train"])
    testing = collapsed(data["test"])
    predt = predict(model=fitted, data=testing)
    cind = concord(data=testing, predt=predt)

In [6]:
output = flow.run()

[2021-02-13 02:53:38+0000] INFO - prefect.FlowRunner | Beginning Flow run for 'My model pipeline'
[2021-02-13 02:53:38+0000] INFO - prefect.TaskRunner | Task 'Convert input data to range form': Starting task run...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cv[duration_col] += delay


[2021-02-13 02:54:03+0000] INFO - prefect.TaskRunner | Task 'Convert input data to range form': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:03+0000] INFO - prefect.TaskRunner | Task 'Initialize Cox model': Starting task run...
[2021-02-13 02:54:03+0000] INFO - prefect.TaskRunner | Task 'Initialize Cox model': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:03+0000] INFO - prefect.TaskRunner | Task 'Create train and test data': Starting task run...
[2021-02-13 02:54:03+0000] INFO - prefect.Create train and test data | Setting the seed to 42
[2021-02-13 02:54:03+0000] INFO - prefect.Create train and test data | Dataset ``train`` has 1044 rows
[2021-02-13 02:54:03+0000] INFO - prefect.Create train and test data | Dataset ``test`` has 185 rows
[2021-02-13 02:54:04+0000] INFO - prefect.TaskRunner | Task 'Create train and test data': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:04+0000] INFO - prefect.TaskRunne

  problem_columns = (censors_only | deaths_only).difference(total).tolist()


Iteration 5: norm_delta = 0.00000, step_size = 1.00000, ll = -3605.24794, newton_decrement = 0.00000, seconds_since_start = 1.0Convergence completed after 5 iterations.


0,1
model,lifelines.CoxTimeVaryingFitter
event col,'WIN'
number of subjects,1044
number of periods,120373
number of events,617
partial log-likelihood,-3605.25
time fit was run,2021-02-13 02:54:04 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,z,p,-log2(p)
HOME_GAMES_IN_LAST_3_DAYS,-0.09,0.91,0.12,-0.32,0.13,0.72,1.14,-0.82,0.41,1.27
HOME_GAMES_IN_LAST_5_DAYS,-0.0,1.0,0.12,-0.24,0.24,0.78,1.27,-0.02,0.98,0.02
HOME_GAMES_IN_LAST_7_DAYS,-0.08,0.92,0.09,-0.26,0.1,0.77,1.1,-0.87,0.38,1.38
HOME_LINEUP_PLUS_MINUS,0.01,1.01,0.01,-0.01,0.04,0.99,1.04,0.93,0.35,1.51
HOME_NET_RATING,0.01,1.01,0.01,-0.02,0.04,0.98,1.04,0.75,0.45,1.14
HOME_W_PCT,-0.12,0.89,0.36,-0.82,0.58,0.44,1.78,-0.34,0.73,0.44
LAST_GAME_WIN,0.11,1.11,0.09,-0.06,0.27,0.94,1.32,1.23,0.22,2.2
SCOREMARGIN,0.07,1.08,0.0,0.07,0.08,1.07,1.08,22.06,<0.005,355.67
VISITOR_GAMES_IN_LAST_3_DAYS,0.01,1.01,0.12,-0.23,0.25,0.79,1.28,0.06,0.95,0.08
VISITOR_GAMES_IN_LAST_5_DAYS,0.04,1.04,0.12,-0.2,0.27,0.82,1.31,0.31,0.76,0.4

0,1
Partial AIC,7238.50
log-likelihood ratio test,636.69 on 14 df
-log2(p) of ll-ratio test,418.85


[2021-02-13 02:54:05+0000] INFO - prefect.TaskRunner | Task 'Fit Cox model': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:05+0000] INFO - prefect.TaskRunner | Task 'Predict partial hazard': Starting task run...
[2021-02-13 02:54:05+0000] INFO - prefect.TaskRunner | Task 'Predict partial hazard': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:05+0000] INFO - prefect.TaskRunner | Task 'Calculate C-Index': Starting task run...
[2021-02-13 02:54:05+0000] INFO - prefect.Calculate C-Index | Model has a C-index of 0.658
[2021-02-13 02:54:05+0000] INFO - prefect.TaskRunner | Task 'Calculate C-Index': Finished task run for task with final state: 'Success'
[2021-02-13 02:54:05+0000] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
