In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from lifelines import KaplanMeierFitter

In [None]:
def model_data():
    train_df = pd.read_csv(r'C:\Users\grina\Desktop\VGTU\final_data.csv', index_col=0)
    train_df.reset_index(inplace=True)
    #replace empty values with 0
    train_df.fillna(value=0, inplace=True)
    train_df.drop(columns=['name', 'Parachute'], inplace=True)
    #change T, F with 1,0
    train_df['is_in_blue_zone'] = train_df['is_in_blue_zone'].replace({True:1, False:0})
    train_df['is_in_red_zone'] = train_df['is_in_red_zone'].replace({True:1, False:0})
    train_df['event'] = 1
    return train_df

## Correlation matrix

In [None]:
%%time
df = model_data()
df = pd.concat([df, pd.get_dummies(df['playing_type'], prefix='playing_type')], axis=1)
df.drop(columns=['playing_type', 'death_time', 'event', 'index'], inplace=True)
#correlation matrix for features
fig, ax = plt.subplots(figsize=(28, 28))
corr = df.corr(method='spearman')
sns.heatmap(corr, annot=True, fmt=".1f", ax=ax)

## Distributions of features

In [None]:
%%time
df_dist = model_data()
#plot scatterplots for features (sharex not needed?)
fig, ax = plt.subplots(5, 15, figsize=(30,30))
for i, col in enumerate(df_dist.columns):
    sns.distplot(df_dist[col], ax=ax.flatten()[i], kde=False)

In [None]:
from scipy import stats
#distribution of death_time
sns.distplot(df_dist['death_time'].div(60).round(3), kde=False)
plt.xlabel('Laikas (min)')
plt.ylabel('Įvykių kiekis')

## Cox PH models

In [None]:
df_t = model_data()
#kolinearumas tarp playing_type?
df_t = pd.concat([df_t, pd.get_dummies(df_t['playing_type'], prefix='playing_type')], axis=1)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_t.drop(columns=['playing_type_2'], inplace=True)
df_t.loc[(df_t['playing_type_1'] == 0) & (df_t['playing_type_3'] == 0), 'playing_type_1'] = 1

In [None]:
%%time
from lifelines import CoxPHFitter
mms = MinMaxScaler(feature_range=(0, 10), copy=False)
#using selected from corr matrix features
df_cox = model_data()

df_cox = pd.concat([df_cox, pd.get_dummies(df_cox['playing_type'], prefix='playing_type')], axis=1)
df_cox.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
df_cox['death_time'] = df_cox['death_time'].div(60).round(4)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox.drop(columns=['duo'], inplace=True)
#df_cox.loc[(df_cox['solo'] == 0) & (df_cox['squad'] == 0), 'squad'] = 1
df_cox.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)

not_scaled = ['event', 'solo', 'squad', 'groggy', 'is_in_blue_zone', 'is_in_red_zone', 'death_time']
df_scaled = df_cox.drop(columns=not_scaled)
#scaling features
scaled_features = mms.fit_transform(df_scaled.values)
df_scaled2 = pd.DataFrame(scaled_features, index=df_scaled.index, columns=df_scaled.columns)
df_scaled2[not_scaled] = df_cox[not_scaled]
X_train2, X_test2, y_train2, y_test2 = train_test_split(df_scaled2, df_scaled2['death_time'], test_size=0.2, random_state=20)

cph = CoxPHFitter(penalizer=0.05, l1_ratio=0.3)
cph.fit(X_train2, duration_col='death_time', event_col='event', show_progress=True) #c-index = 0.91
cph.print_summary()

In [None]:
%%time
from lifelines import CoxPHFitter
mms = MinMaxScaler(feature_range=(0, 10), copy=False)
#using selected from corr matrix features
df_cox = model_data()

df_cox = pd.concat([df_cox, pd.get_dummies(df_cox['playing_type'], prefix='playing_type')], axis=1)
df_cox.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
df_cox['death_time'] = df_cox['death_time'].div(60).round(4)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox.drop(columns=['duo'], inplace=True)
#df_cox.loc[(df_cox['solo'] == 0) & (df_cox['squad'] == 0), 'squad'] = 1
df_cox.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)

not_scaled = ['event', 'solo', 'squad', 'groggy', 'is_in_blue_zone', 'is_in_red_zone', 'death_time']
df_scaled = df_cox.drop(columns=not_scaled)
#scaling features
scaled_features = mms.fit_transform(df_scaled.values)
df_scaled2 = pd.DataFrame(scaled_features, index=df_scaled.index, columns=df_scaled.columns)
df_scaled2[not_scaled] = df_cox[not_scaled]
X_train2, X_test2, y_train2, y_test2 = train_test_split(df_scaled2, df_scaled2['death_time'], test_size=0.2, random_state=20)

cph = CoxPHFitter(penalizer=0.05)
cph.fit(X_train2, duration_col='death_time', event_col='event', show_progress=True) #c-index = 0.91
cph.print_summary()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax = cph.plot(['dist_on_foot', 'dist_on_vehicle', 'dist_on_swim', 'groggy', 'dmg_per_distance'])
plt.title('5 didžiausią įtaką rizikos santykiui turintys atributai')
plt.xlabel('log(Rizikos santykis), (CI 95%)', fontsize=12)

In [None]:
fig, ax = plt.subplots(figsize=(8, 13))
cph.plot()
plt.xlabel('log(Rizikos santykis), (CI 95%)', fontsize=12)

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
x_trn = X_train2.drop(columns=['death_time', 'event'])
# [variance_inflation_factor(x_trn.values, j) for j in range(x_trn.shape[1])]
variables = x_trn.columns
vif = [variance_inflation_factor(x_trn[variables].values, x_trn.columns.get_loc(var)) for var in x_trn.columns]

In [None]:
from lifelines import CoxPHFitter
df_cox1 = model_data()
df_cox1 = pd.concat([df_cox1, pd.get_dummies(df_cox1['playing_type'], prefix='playing_type')], axis=1)
df_cox1.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
df_cox1['death_time'] = df_cox1['death_time'].div(60).round(4)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox1.drop(columns=['duo'], inplace=True)
#df_cox1.loc[(df_cox1['solo'] == 0) & (df_cox1['squad'] == 0), 'squad'] = 1
df_cox1.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)

X_train3, X_test3, y_train3, y_test3 = train_test_split(df_cox1, df_cox1['death_time'], test_size=0.2, random_state=20)

cph1 = CoxPHFitter(penalizer=0.05)
cph1.fit(X_train3, duration_col='death_time', event_col='event', show_progress=True) #c-index = 0.91
cph1.print_summary()

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax = cph1.plot_covariate_groups('dist_on_vehicle', [0,2000,4000,6000,8000,10000,12000,14000], cmap='coolwarm', ax=ax)
plt.xlabel('Laikas (min)', fontsize=12)
plt.title('Išlikimo tikimybių kreivės kintant atributui dist_on_vehicle', fontsize=13)
plt.ylabel('Išlikimo tikimybė S(t)', fontsize=12)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax = cph1.plot_covariate_groups('dist_on_foot', [0,1000,2000,3000,4000,5000,6000,7000], cmap='coolwarm', ax=ax)
plt.xlabel('Laikas (min)', fontsize=12)
plt.title('Išlikimo tikimybių kreivės kintant atributui dist_on_foot', fontsize=13)
plt.ylabel('Išlikimo tikimybė S(t)', fontsize=12)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax = cph1.plot_covariate_groups('squad', [0,1], cmap='coolwarm', ax=ax)
plt.xlabel('Laikas (min)', fontsize=12)
plt.title('Išlikimo tikimybių kreivės kintant atributui squad', fontsize=13)
plt.ylabel('Išlikimo tikimybė S(t)', fontsize=12)

In [None]:
from lifelines.statistics import logrank_test
ix = df_cox1['squad'] == 1
T, E = df_cox1['death_time'], df_cox1['event']
res = logrank_test(T[ix], T[~ix], event_observed_A=E[ix], event_observed_B=E[~ix])
print(res.p_value)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax = cph1.plot_covariate_groups('groggy', [0,1], cmap='coolwarm', ax=ax)
plt.xlabel('Laikas (min)', fontsize=12)
plt.title('Išlikimo tikimybių kreivės kintant atributui groggy', fontsize=13)
plt.ylabel('Išlikimo tikimybė S(t)', fontsize=12)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax = cph1.plot_covariate_groups('dmg_per_distance', [0,0.07,0.14,0.21,0.28,0.35], cmap='coolwarm', ax=ax)
plt.xlabel('Laikas (min)', fontsize=12)
plt.title('Išlikimo tikimybių kreivės kintant atributui dmg_per_distance', fontsize=13)
plt.ylabel('Išlikimo tikimybė S(t)', fontsize=12)

In [None]:
#duomenu analizes skyreliui skirtumai grupiu
from sklearn.preprocessing import OneHotEncoder
df = model_data()
df = pd.concat([df, pd.get_dummies(df['playing_type'], prefix='playing_type')], axis=1)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df.drop(columns=['playing_type_2'], inplace=True)
df.loc[(df['playing_type_1'] == 0) & (df['playing_type_3'] == 0), 'playing_type_3'] = 1

fig, ax = plt.subplots(figsize=(12, 12))
kmf = KaplanMeierFitter()
ranges = [0,3,6,9,12,15]
df['kills_g'] = pd.cut(df['kills'], ranges, right=False, labels=[1,2,3,4,5])
#kaplan-meier survival plots
for name, grouped_df in df.groupby('kills_g'):
    kmf.fit(grouped_df["death_time"], grouped_df["event"], label=name)
    plt.axvline(x=kmf.median_survival_time_, color='black', ls ='--', ymax=0.5)
    kmf.plot(ax=ax, ci_show=False)

plt.axhline(y=0.5, color='black', ls ='--')

In [None]:
fig1, ax1 = plt.subplots(figsize=(10, 6))
legend = ['Žaidžia komandoje', 'Žaidžia vienas']
kmf1 = KaplanMeierFitter()
i = 0
for name, grouped_df in df.groupby('playing_type_1'):
    kmf1.fit(grouped_df["death_time"].div(60).round(1), grouped_df["event"], label=legend[i])
    plt.axvline(x=kmf1.median_survival_time_, color='black', ls ='--', ymax=0.47)
    print(kmf1.median_survival_time_)
    kmf1.plot(ax=ax1, ci_show=False)
    i = i + 1
plt.axhline(y=0.5, color='black', ls ='--', xmax=(9.1/33))
ax1.annotate('S(t)={:.1f}'.format(0.5), xy=(0.51,0.51), xytext=(0.51,0.51), fontsize=12)
plt.xticks(np.arange(0, 32, 5))
plt.xlabel('Laikas (min)')
plt.ylabel('Išlikimo tikimybė')
ax1.set_ylim(bottom=0)

In [None]:
#groggy kaplan-meier graph
fig2, ax2 = plt.subplots(figsize=(10, 6))
kmf2 = KaplanMeierFitter()
legend2 = ['Nebuvo sužeistas', 'Buvo sužeistas']
i = 0
for name, grouped_df in df.groupby('groggy'):
    kmf2.fit(grouped_df["death_time"].div(60).round(1), grouped_df["event"], label=legend2[i])
    plt.axvline(x=kmf2.median_survival_time_, color='black', ls ='--', ymax=0.47)
    plt.text(kmf2.median_survival_time_, 0.25, str(kmf2.median_survival_time_), ha='center', va='center',rotation='horizontal', backgroundcolor='white')
    kmf2.plot(ax=ax2, ci_show=False)
    i = i + 1
plt.axhline(y=0.5, color='black', ls ='--', xmax=(11.9/33))
plt.xticks(np.arange(0, 32, 5))
ax2.annotate('S(t)={:.1f}'.format(0.5), xy=(0.51,0.51), xytext=(0.51,0.51), fontsize=12)
plt.xlabel('Laikas (min)')
plt.ylabel('Išlikimo tikimybė S(t)')
ax2.set_ylim(bottom=0)

In [None]:
fig3, ax3 = plt.subplots(figsize=(10, 6))
kmf3 = KaplanMeierFitter()
legend3 = ['Buvo mėlynojoje zonoje', 'Nebuvo mėlynojoje zonoje']
i = 0
for name, grouped_df in df.groupby('is_in_blue_zone'):
    kmf3.fit(grouped_df["death_time"].div(60).round(1), grouped_df["event"], label=legend3[i])
    plt.axvline(x=kmf3.median_survival_time_, color='black', ls ='--', ymax=0.47)
    plt.text(kmf3.median_survival_time_, 0.25, str(kmf3.median_survival_time_), ha='center', va='center',rotation='horizontal', backgroundcolor='white')
    print(kmf3.median_survival_time_)
    kmf3.plot(ax=ax3, ci_show=False)
    i = i + 1
ax3.annotate('S(t)={:.1f}'.format(0.5), xy=(0.51,0.51), xytext=(0.51,0.51), fontsize=12)
plt.xticks(np.arange(0, 32, 5))
plt.axhline(y=0.5, color='black', ls ='--', xmax=(kmf3.median_survival_time_/33))
plt.xlabel('Laikas (min)')
plt.ylabel('Išlikimo tikimybė')
ax3.set_ylim(bottom=0)

In [None]:
df = model_data()
fig3, ax3 = plt.subplots(figsize=(10, 6))
kmf3 = KaplanMeierFitter()
legend3 = ['Buvo raudonojoje zonoje', 'Nebuvo raudonojoje zonoje']
i = 0
for name, grouped_df in df.groupby('is_in_red_zone'):
    kmf3.fit(grouped_df["death_time"].div(60).round(1), grouped_df["event"], label=legend3[i])
    plt.axvline(x=kmf3.median_survival_time_, color='black', ls ='--', ymax=0.47)
    print(kmf3.median_survival_time_)
    kmf3.plot(ax=ax3, ci_show=False)
    i = i + 1
ax3.annotate('S(t)={:.1f}'.format(0.5), xy=(0.51,0.51), xytext=(0.51,0.51), fontsize=12)
plt.xticks(np.arange(0, 32, 5))
plt.axhline(y=0.5, color='black', ls ='--', xmax=(kmf3.median_survival_time_/33))
plt.xlabel('Laikas (min)')
plt.ylabel('Išlikimo tikimybė')
ax3.set_ylim(bottom=0)

In [None]:
#logrank test to check hipothesis that groups differ significantly from each other
from lifelines.statistics import logrank_test
features2 = ['is_in_blue_zone', 'event', 'death_time']
df_log_rank = df_log[features2]
ix = df_log_rank['is_in_blue_zone'] == 1
T, E = df_log_rank['death_time'], df_log_rank['event']
res = logrank_test(T[ix], T[~ix], event_observed_A=E[ix], event_observed_B=E[~ix])
print(res.p_value)
print(res.test_statistic)

In [None]:
features3 = ['groggy', 'event', 'death_time']
df_log_rank = df_log[features3]
ix = df_log_rank['groggy'] == 1
T, E = df_log_rank['death_time'], df_log_rank['event']
res = logrank_test(T[ix], T[~ix], event_observed_A=E[ix], event_observed_B=E[~ix])
print(res.p_value)
print(res.test_statistic)

In [None]:
features4 = ['playing_type_3', 'event', 'death_time']
df_log_rank = df_log[features4]
ix = df_log_rank['playing_type_3'] == 1
T, E = df_log_rank['death_time'], df_log_rank['event']
res = logrank_test(T[ix], T[~ix], event_observed_A=E[ix], event_observed_B=E[~ix])
print(res.p_value)
print(res.test_statistic)

# pysurvival cox reg

In [None]:
%%time
from sklearn.model_selection import train_test_split
from pysurvival.models.semi_parametric import CoxPHModel
from pysurvival.utils.metrics import concordance_index
from pysurvival.utils.display import integrated_brier_score
from pysurvival.utils.display import display_loss_values
df_cox_pysurv = model_data()
mms = MinMaxScaler(feature_range=(0, 10), copy=False)
df_cox_pysurv = pd.concat([df_cox_pysurv, pd.get_dummies(df_cox_pysurv['playing_type'], prefix='playing_type')], axis=1)
df_cox_pysurv.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox_pysurv.drop(columns=['duo'], inplace=True)
df_cox_pysurv.loc[(df_cox_pysurv['solo'] == 0) & (df_cox_pysurv['squad'] == 0), 'squad'] = 1
df_cox_pysurv['death_time'] = df_cox_pysurv['death_time'].div(60).round(0)
df_cox_pysurv.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)

index_train, index_test = train_test_split(range(df_cox_pysurv.shape[0]), test_size = 0.2, random_state=20)
data_train = df_cox_pysurv.loc[index_train].reset_index(drop = True)
data_test  = df_cox_pysurv.loc[index_test].reset_index(drop = True)

X_train, X_test = data_train.drop(columns=['death_time', 'event']), data_test.drop(columns=['death_time', 'event'])
T_train, T_test = data_train['death_time'].values, data_test['death_time'].values
E_train, E_test = data_train['event'].values, data_test['event'].values
coxph = CoxPHModel()
coxph.fit(X_train, T_train, E_train, lr=0.2, max_iter=70) 


In [None]:
%%time
from sklearn.model_selection import train_test_split
from pysurvival.models.semi_parametric import CoxPHModel
from pysurvival.utils.metrics import concordance_index
from pysurvival.utils.display import integrated_brier_score
from pysurvival.utils.display import display_loss_values

df_cox_pysurv = model_data()
mms = MinMaxScaler(feature_range=(0, 10), copy=False)
df_cox_pysurv = pd.concat([df_cox_pysurv, pd.get_dummies(df_cox_pysurv['playing_type'], prefix='playing_type')], axis=1)
df_cox_pysurv.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox_pysurv.drop(columns=['duo'], inplace=True)
df_cox_pysurv.loc[(df_cox_pysurv['solo'] == 0) & (df_cox_pysurv['squad'] == 0), 'squad'] = 1
df_cox_pysurv['death_time'] = df_cox_pysurv['death_time'].div(60).round(0)
df_cox_pysurv.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)

index_train, index_test = train_test_split(range(df_cox_pysurv.shape[0]), test_size = 0.2, random_state=20)
data_train = df_cox_pysurv.loc[index_train].reset_index(drop = True)
data_test  = df_cox_pysurv.loc[index_test].reset_index(drop = True)

X_train, X_test = data_train.drop(columns=['death_time', 'event']), data_test.drop(columns=['death_time', 'event'])
T_train, T_test = data_train['death_time'].values, data_test['death_time'].values
E_train, E_test = data_train['event'].values, data_test['event'].values
test = X_train.iloc[:, 1]

def fit_score_feature(X_train, T_train, E_train, X_test, T_test, E_test):
    n_feat = X_train.shape[1]
    scores = np.empty(n_feat) 
    coxph = CoxPHModel()
    for i in range(n_feat):
        Xi = pd.DataFrame(X_train.iloc[:,i])
        Xi2 = pd.DataFrame(X_test.iloc[:,i])
        coxph.fit(Xi, T_train, E_train, lr=0.001, max_iter=50)
        scores[i] = concordance_index(coxph, Xi2, T_test, E_test)
    return scores

scores = fit_score_feature(X_train, T_train, E_train, X_test, T_test, E_test)
pd.Series(scores, index=X_train.columns).sort_values(ascending=False)


In [None]:
from pysurvival.utils.metrics import brier_score
def brier_score_plot(model, X, T, E, figure_size):
    
    times, brier_scores = brier_score(model, X, T, E)
    times.insert(0, 0)
    brier_scores.insert(0, 0)
    ibs_value = np.trapz(brier_scores, times)/max(T)

    fig, ax = plt.subplots(figsize=figure_size)
    title = 'Brier įvertinimų vidurkis = {:.2f}'
    title = title.format(ibs_value)
    ax.axhline(y=0.25, ls = 'dotted', color = 'red')
    ax.plot(times, brier_scores, color = 'blue', lw = 2)
    ax.set_xlim(0, max(T))
    ax.set_ylim(0)
    plt.xlabel('Laikas (min)', fontsize=13)
    plt.ylabel('Brier įvertinimas BS(t)', fontsize=13)
    ax.axhline(y=0.25, ls = 'dotted', color = 'red')
    plt.title(title, fontsize=18)
    plt.show()
    return ibs_value

In [None]:
ibs = brier_score_plot(coxph, X_test, T_test, E_test, figure_size=(15, 6))

In [None]:
c_index = concordance_index(coxph, X_test, T_test, E_test)
print('C-index: {:.2f}'.format(c_index)) #0.90

In [None]:
prediction = coxph.predict_risk(X_test)

In [None]:
y_tst = np.column_stack((E_test, T_test))
y_trn = np.column_stack((E_train, T_train))
ytst = pd.DataFrame()
ytrn = pd.DataFrame()
ytst['event'], ytst['death_time'] = y_tst.T
ytrn['event'], ytrn['death_time'] = y_trn.T
ytrn['event'] = ytrn['event'].astype('bool')
s = ytrn.dtypes
ytst['event'] = ytst['event'].astype('bool')
s1 = ytst.dtypes

y_train = np.array([tuple(x) for x in ytrn.values], dtype=list(zip(s.index, s)))
y_test = np.array([tuple(x) for x in ytst.values], dtype=list(zip(s1.index, s1)))

In [None]:
train_min, train_max = y_train['death_time'].min(), y_train['death_time'].max()
test_min, test_max = y_test['death_time'].min(), y_test["death_time"].max()

In [None]:
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.metrics import (concordance_index_censored,concordance_index_ipcw,cumulative_dynamic_auc)
mms = MinMaxScaler(feature_range=(0, 10), copy=False)
df_cox_sksurv = model_data()
df_cox_sksurv = pd.concat([df_cox_sksurv, pd.get_dummies(df_cox_sksurv['playing_type'], prefix='playing_type')], axis=1)
df_cox_sksurv.rename(columns={"playing_type_1":"solo", "playing_type_2":"duo", "playing_type_3":"squad"}, inplace=True)
#del multikolinearumo pasalinam duo playing_type_2 atributa
df_cox_sksurv.drop(columns=['duo'], inplace=True)
#df_cox_sksurv.loc[(df_cox_sksurv['solo'] == 0) & (df_cox_sksurv['squad'] == 0), 'squad'] = 1
df_cox_sksurv.drop(columns=['distance_sum', 'index', 'playing_type', 'assist', 'item_stack_count', 'damage', 'dist_on_freefall', 'rank'], inplace=True)
#df_cox_sksurv['death_time'] = df_cox_sksurv['death_time'].div(60).round(1)

not_scaled = ['event', 'solo',  'squad', 'groggy', 'is_in_blue_zone',  'is_in_red_zone','death_time']
df_scaled = df_cox_sksurv.drop(columns=not_scaled)
#scaling features
scaled_features = mms.fit_transform(df_scaled.values)
df_scaled2 = pd.DataFrame(scaled_features, index=df_scaled.index, columns=df_scaled.columns)
df_scaled2[not_scaled] = df_cox_sksurv[not_scaled]

Xt = df_scaled2.drop(columns=['death_time', 'event'])
y = df_scaled2[['event', 'death_time']]
y['event'] = y['event'].astype('bool')
s = y.dtypes
yt = np.array([tuple(x) for x in y.values], dtype=list(zip(s.index, s)))

X_train, X_test, y_train, y_test = train_test_split(Xt, yt, test_size=0.2, random_state=20)

cox_sksurv = CoxPHSurvivalAnalysis()
cox_sksurv.fit(X_train, y_train)

In [None]:
def fit_and_score_features(X, y):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = CoxPHSurvivalAnalysis()
    for j in range(n_features):
        Xj = X[:, j:j+1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores

scores = fit_and_score_features(X_train.values, y_train)
pd.Series(scores, index=X_train.columns).sort_values(ascending=False)

In [None]:
times = np.arange(1, 31, 1)
va_auc, va_mean_auc = cumulative_dynamic_auc(y_train, y_test, cox_sksurv.predict(X_test), times)
plt.plot(times, va_auc, marker="o")
plt.xlabel("Žaidimo laikas (min)")
plt.axhline(va_mean_auc, linestyle="--")
plt.text(9.5, 0.969, 'AUC(0;30)=0.972', fontsize=12)
plt.ylabel("AUC")
plt.grid(True)
va_mean_auc

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import median_absolute_error
from pysurvival import utils
from pysurvival.models.non_parametric import KaplanMeierModel
from pysurvival import utils
from pysurvival.utils import metrics

def act_to_est(model, X, T, E, figure_size, times = None,  metrics = ['rmse', 'mean', 'median']):
    kmf = KaplanMeierModel()
    kmf.fit(T, E)
    N = T.shape[0]
    if times is None:
        times = kmf.times
    actual = []
    predicted = []

    model_pred =  np.sum(model.predict_density(X), 0)
    for t in times:
        min_index = [abs(aj1-t) for (aj1, aj) in model.time_buckets]
        index = np.argmin(min_index)
        actual.append(N*kmf.predict_density(X,t))
        predicted.append(model_pred[index])
    predicted.insert(0, 0)    

    results = None
    title = 'Realus ir prognozuojamas ivykių skaičius'
    if metrics is not None:
        rmse = np.sqrt(mean_squared_error(actual, predicted))
        med_ae = median_absolute_error(actual, predicted)
        mae = mean_absolute_error(actual, predicted)

        #jei ivertinimo reikia tik vieno
        if isinstance(metrics, str) :
            if 'rmse' in metrics.lower() or 'root' in metrics.lower():
                results = rmse
                title += "\n Šaknis iš vidutinės kvadratinės paklaidos = {:.3f}".format(rmse)
            elif 'median' in metrics.lower() :
                results = med_ae
                title += "\n Absoliutinės paklaidos mediana = {:.3f}".format(med_ae)
            elif 'mean' in metrics.lower() :
                results = mae
                title += "\n Vidutinė absoliutinė paklaida = {:.3f}".format(mae)
            else:
                raise NotImplementedError('{} nėra tokio įvertinimo'.format(metrics))

        #jei reikalingu ivertinimu reikia saraso
        elif isinstance(metrics, list):
            results = {}
            is_rmse = False
            if any( [ ('rmse' in m.lower() or 'root' in m.lower()) \
                for m in metrics ]):
                is_rmse = True
                results['rmse'] = rmse
                title += "\n Šaknis iš vidutinės kvadratinės paklaidos = {:.3f}".format(rmse)
            is_med_ae = False
            if any( ['median' in m.lower() for m in metrics ]):
                is_med_ae = True
                results['median'] = med_ae
                title += "\n Absoliutinių paklaidų mediana = {:.3f}".format(med_ae)
            is_mae = False
            if any( ['mean' in m.lower() for m in metrics ]):
                is_mae = True
                results['mean'] = mae
                title += "\n Vidutinė absoliutinė paklaida = {:.3f}".format(mae)
            if all([not is_mae, not is_rmse, not is_med_ae]):
                error = 'Nurodyti vertinimai nerasti'
                raise NotImplementedError(error)

    fig, ax = plt.subplots(figsize=figure_size)
    ax.plot(times, actual, color='red', label='Realus', alpha=0.8, lw = 3)
    ax.plot(times, predicted, color='blue', label='Prognozuojamas', alpha=0.8, lw = 3)
    plt.xlim(0, max(T))
    ax.set_ylim(0)
    plt.xlabel('Laikas (min)', fontsize=13)
    plt.ylabel('Įvykių skaičius', fontsize=13)
    plt.title(title, fontsize = 15)
    plt.legend(fontsize = 15)
    plt.show()

    return results

In [None]:
results_end = act_to_est(coxph, X_test, T_test, E_test, figure_size=(15, 6), metrics=['rmse', 'mean'])