# Basic Cox time-varying proportional hazards model

In [1]:
import sys
import logging
from pathlib import Path

import pandas as pd
from prefect import Flow

from nbaspa.model.tasks import (
    SurvivalData,
    SegmentData,
    InitializeLifelines,
    FitLifelinesModel,
    CollapseData,
    Predict,
    ConcordanceIndex,
)
from nbaspa.model.tasks.meta import META

## Load the data

In [2]:
df = pd.concat(
    pd.read_csv(fpath, sep="|", dtype={"GAME_ID": str}, index_col=0)
    for fpath in Path("..", "nba-data", "2018-19", "model-data").glob("data_*.csv")
).reset_index(drop=True)

In [3]:
df[
    [META["id"]] + META["dynamic"] + META["static"]
].head(n=5)

Unnamed: 0,GAME_ID,SCOREMARGIN,HOME_LINEUP_PLUS_MINUS,VISITOR_LINEUP_PLUS_MINUS,HOME_NET_RATING,VISITOR_NET_RATING,HOME_W_PCT,VISITOR_W_PCT,LAST_GAME_WIN,HOME_GAMES_IN_LAST_3_DAYS,HOME_GAMES_IN_LAST_5_DAYS,HOME_GAMES_IN_LAST_7_DAYS,VISITOR_GAMES_IN_LAST_3_DAYS,VISITOR_GAMES_IN_LAST_5_DAYS,VISITOR_GAMES_IN_LAST_7_DAYS
0,21800001,0,0.2,2.7,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
1,21800001,0,0.2,2.7,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
2,21800001,0,0.2,2.7,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
3,21800001,0,0.2,2.7,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
4,21800001,0,0.2,2.7,5.1,3.3,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0


## Build a basic model (default values)

In [4]:
format_data = SurvivalData(name="Convert input data to range form")
segdata = SegmentData(name="Create train and test data")
model = InitializeLifelines(name="Initialize Cox model")
trained = FitLifelinesModel(name="Fit Cox model")
collapsed = CollapseData(name="Collapse test data")
predict = Predict(name="Predict partial hazard")
concord = ConcordanceIndex(name="Calculate C-Index")

In [5]:
with Flow(name="My model pipeline") as flow:
    alldata = format_data(df)
    data = segdata(alldata)
    model_obj = model()
    fitted = trained(model=model_obj, data=data["train"])
    testing = collapsed(data["test"])
    predt = predict(model=fitted, data=testing)
    cind = concord(data=testing, predt=predt)

In [6]:
output = flow.run()

[2021-02-18 08:22:38+0000] INFO - prefect.FlowRunner | Beginning Flow run for 'My model pipeline'
[2021-02-18 08:22:38+0000] INFO - prefect.TaskRunner | Task 'Convert input data to range form': Starting task run...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cv[duration_col] += delay


[2021-02-18 08:23:06+0000] INFO - prefect.TaskRunner | Task 'Convert input data to range form': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:06+0000] INFO - prefect.TaskRunner | Task 'Initialize Cox model': Starting task run...
[2021-02-18 08:23:06+0000] INFO - prefect.TaskRunner | Task 'Initialize Cox model': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:06+0000] INFO - prefect.TaskRunner | Task 'Create train and test data': Starting task run...
[2021-02-18 08:23:06+0000] INFO - prefect.Create train and test data | Setting the seed to 42
[2021-02-18 08:23:06+0000] INFO - prefect.Create train and test data | Dataset ``train`` has 1045 games with 123453 rows
[2021-02-18 08:23:06+0000] INFO - prefect.Create train and test data | Dataset ``test`` has 185 games with 21874 rows
[2021-02-18 08:23:06+0000] INFO - prefect.TaskRunner | Task 'Create train and test data': Finished task run for task with final state: 'Success'
[2021-02-18 08:2

  problem_columns = (censors_only | deaths_only).difference(total).tolist()


0,1
model,lifelines.CoxTimeVaryingFitter
event col,'WIN'
number of subjects,1045
number of periods,123453
number of events,611
partial log-likelihood,-3597.97
time fit was run,2021-02-18 08:23:07 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,z,p,-log2(p)
HOME_NET_RATING,0.03,1.03,0.01,-0.0,0.05,1.0,1.06,1.78,0.08,3.73
VISITOR_NET_RATING,-0.02,0.98,0.01,-0.05,0.0,0.95,1.0,-1.75,0.08,3.64
HOME_W_PCT,-0.21,0.81,0.37,-0.94,0.52,0.39,1.68,-0.57,0.57,0.81
VISITOR_W_PCT,0.4,1.49,0.38,-0.34,1.14,0.71,3.12,1.05,0.29,1.77
LAST_GAME_WIN,0.01,1.01,0.09,-0.16,0.19,0.85,1.21,0.16,0.87,0.19
HOME_GAMES_IN_LAST_3_DAYS,-0.06,0.94,0.12,-0.28,0.17,0.75,1.18,-0.5,0.62,0.7
HOME_GAMES_IN_LAST_5_DAYS,0.07,1.07,0.12,-0.18,0.31,0.84,1.36,0.53,0.60,0.75
HOME_GAMES_IN_LAST_7_DAYS,-0.11,0.89,0.09,-0.29,0.07,0.75,1.07,-1.23,0.22,2.2
VISITOR_GAMES_IN_LAST_3_DAYS,-0.08,0.93,0.13,-0.32,0.17,0.72,1.19,-0.6,0.55,0.86
VISITOR_GAMES_IN_LAST_5_DAYS,0.12,1.12,0.13,-0.13,0.36,0.88,1.44,0.94,0.35,1.52

0,1
Partial AIC,7223.95
log-likelihood ratio test,669.52 on 14 df
-log2(p) of ll-ratio test,442.10


[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'Fit Cox model': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'GetItem': Starting task run...
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'GetItem': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'Collapse test data': Starting task run...
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'Collapse test data': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'Predict partial hazard': Starting task run...
[2021-02-18 08:23:07+0000] INFO - prefect.TaskRunner | Task 'Predict partial hazard': Finished task run for task with final state: 'Success'
[2021-02-18 08:23:08+0000] INFO - prefect.TaskRunner | Task 'Calculate C-Index': Starting task run...
[2021-02-18 08:23:08+0000] INFO - prefect.Calculate C-Index 