In [None]:
### Please use this when running code in Google Colab.
from google.colab import drive
drive.mount('/content/drive')

In [None]:
### Module ###
import pandas as pd
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt

In [None]:
### Visualize mus transition ###
def visualize_mus_transition():

    ### Coefficients
    # Item_1(Dummy_ALB)
    coef_1_type1 = [-0.023809523809524, 5]
    coef_1_type2 = [-0.0089285714285714, 5]

    # Item_2(Dummy_BUN)
    coef_2_type1 = [2*1e-7]
    coef_2_type2 = [1*1e-6]

    # Item_1
    x = np.linspace(0,168,168+1)
    y_type1 = coef_1_type1[0]*x + coef_1_type1[1]
    y_type2 = coef_1_type2[0]*x + coef_1_type2[1]

    fig, ax  = plt.subplots(figsize=(10,3))
    ax.plot(x,y_type1, label="type_1")
    ax.plot(x,y_type2, label="type_2")
    ax.set_xlim(0-1, 168)
    ax.set_ylim(-0.5, 5.5)
    ax.legend(bbox_to_anchor=(1,1), loc="upper left")
    ax.grid(linestyle=":")
    ax.set_title("Item_1")

    plt.show()

    # Item_2
    x = np.linspace(0,168,168)
    y_type1 = coef_2_type1[0]*pow(x, 3)
    y_type2 = coef_2_type2[0]*pow(x, 3)

    fig, ax  = plt.subplots(figsize=(10,3))
    ax.plot(x,y_type1, label="type_1")
    ax.plot(x,y_type2, label="type_2")
    ax.set_xlim(0-1, 168+1)
    ax.set_ylim(-0.5, 5.5)
    ax.legend(bbox_to_anchor=(1,1), loc="upper left")
    ax.grid(linestyle=":")
    ax.set_title("Item_2")

    # Item_3
    fig, ax  = plt.subplots(figsize=(10,3))
    x = np.linspace(0,168,168)
    y = [1]*168
    ax.plot(x,y, label="type_1&2")
    ax.set_xlim(0-1, 168+1)
    ax.set_ylim(-0.5, 2)
    ax.legend(bbox_to_anchor=(1,1), loc="upper left")
    ax.grid(linestyle=":")
    ax.set_title("Item_3")

    plt.show()

In [None]:
### Running ###
visualize_mus_transition()

In [None]:
### Dummy data generation ###
def generate_means(inv_time_point: int):

    ### Coefficients
    # Item_1(Dummy_ALB)
    coef_1_type1 = [-0.023809523809524, 5]
    coef_1_type2 = [-0.0089285714285714, 5]

    # Item_2(Dummy_BUN)
    coef_2_type1 = [2*1e-7]
    coef_2_type2 = [1*1e-6]

    # Item_3(Dummy_others)
    coef_3 = 1

    ### calculate means
    mean_type1 = [coef_1_type1[0]*inv_time_point + coef_1_type1[1], coef_2_type1[0]*pow(inv_time_point, 3), coef_3]
    mean_type2 = [coef_1_type2[0]*inv_time_point + coef_1_type2[1], coef_2_type2[0]*pow(inv_time_point, 3), coef_3]

    return np.array([mean_type1, mean_type2])

def generate_covs():

    return np.array([np.eye(3), np.eye(3)])

def generate_dummy_from_gmm(type: int, inv_time_point: int):

    mus = generate_means(inv_time_point=inv_time_point)
    covs = generate_covs()

    z = type -1
    mu, cov = mus[z], covs[z]
    x = np.random.multivariate_normal(mu, cov)

    # np.float64 >>> float
    x = [float(value) for value in x]

    return x

def aggregate_dummy_per_type(type: int, inv_time_point: int, num_sample=100):

    dummies = []

    for i in range(num_sample):
        dummy_test_values = list(generate_dummy_from_gmm(type=type, inv_time_point=inv_time_point))
        dummy_test_values.append(i+1)
        dummies.append(dummy_test_values)

    return dummies

def generate_dummy_one_time_point(time_point: int):

    inv_time_point = 168 - time_point

    columns = ["Item_1", "Item_2", "Item_3", "Patient_ID"]
    dummies_type1 = pd.DataFrame(aggregate_dummy_per_type(type=1, inv_time_point=inv_time_point), columns=columns)
    dummies_type2 = pd.DataFrame(aggregate_dummy_per_type(type=2, inv_time_point=inv_time_point), columns=columns)
    dummies_type2["Patient_ID"] += 100

    dummies = pd.concat([dummies_type1, dummies_type2]).reset_index(drop=True)
    dummies["Time_point"] = time_point

    return dummies

def generate_dummy_main():

    df_dummy = pd.DataFrame()

    for time_point in range(1,90+1):
        df_dummy_time_point = generate_dummy_one_time_point(time_point=time_point)
        df_dummy = pd.concat([df_dummy, df_dummy_time_point])

    df_dummy_negative = generate_dummy_one_time_point(time_point=168)
    df_dummy = pd.concat([df_dummy, df_dummy_negative])
    df_dummy = df_dummy.reset_index(drop=True)

    return df_dummy


In [None]:
### Save Generated data ###
df_dummy = generate_dummy_main()
#df_dummy.to_pickle("path/to/output/directory")
df_dummy.to_csv("/content/drive/MyDrive/res_death_destiny/data/dummy_time_series_EHRdata.csv", index=False)
df_dummy

In [None]:
### Visualize distribution ###
def visualize_dummy_distribution(df_dummy: pd.DataFrame, time_point: int):

    df_dummy_time_point = df_dummy.query("Time_point == @time_point")
    df_dummy_time_point_type1 = df_dummy_time_point.query("Patient_ID <= 100")
    df_dummy_time_point_type2 = df_dummy_time_point.query("Patient_ID > 100")

    x_1 = df_dummy_time_point_type1.iloc[:,0]
    y_1 = df_dummy_time_point_type1.iloc[:,1]
    z_1 = df_dummy_time_point_type1.iloc[:,2]

    x_2 = df_dummy_time_point_type2.iloc[:,0]
    y_2 = df_dummy_time_point_type2.iloc[:,1]
    z_2 = df_dummy_time_point_type2.iloc[:,2]

    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.scatter(x_1, y_1, z_1, color="r")
    ax.scatter(x_2, y_2, z_2, color="b")
    ax.set_title("Time_point: "+str(time_point)+"(Day)")

    plt.show()

In [None]:
### Running ###
df_dummy = generate_dummy_main()

for time_point in [168, 90, 45, 1]:
    visualize_dummy_distribution(df_dummy=df_dummy, time_point=time_point)

