### Goal of this notebook is to see if the implemented gcp stroke model and the stroke phenotype models are reasonable, meaningful. This notebook analyzes results from an OSC microsim simulation.

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
data = pd.read_csv("/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/SIMULATIONS/VALIDATION-OF-STROKE-GCP-AND-PHENOTYPE/POP-50K/data.csv", index_col=0, low_memory=False)
strokePhenotype = pd.read_csv("/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM/SIMULATIONS/VALIDATION-OF-STROKE-GCP-AND-PHENOTYPE/POP-50K/strokePhenotypes.csv", index_col=0)

In [None]:
data.head()

In [None]:
maxWave = data[:"age0"].shape[0]-1
maxWave

In [None]:
strokePhenotype.head()

In [None]:
strokePhenotypeLabels = {"StrokeSubtype.CARDIOEMBOLIC":"CE", "StrokeSubtype.SMALL_VESSEL":"SV", 
                         "StrokeSubtype.OTHER":"OTHER", "StrokeSubtype.LARGE_VESSEL":"LV"}

In [None]:
strokePhenotype["strokeSubtypeLabels"]=strokePhenotype["strokeSubtype"].replace(strokePhenotypeLabels)

In [None]:
strokePhenotype.head()

In [None]:
plt.hist(strokePhenotype["nihss"])
plt.show()

In [None]:
plt.hist(strokePhenotype["strokeType"])
plt.show()

In [None]:
plt.hist(strokePhenotype["strokeSubtypeLabels"])
plt.show()

In [None]:
strokePhenotype["strokeSubtypeLabels"].value_counts(normalize=True)

In [None]:
#data["0"]

In [None]:
data.loc["stroke0Wave"] = data.loc["stroke0Age"] - data.loc["age0"].astype('int')

In [None]:
def get_prestroke_gcp(x):
    return  ([None] #[np.array([None for i in range(maxWave)])] 
            if pd.isna(x["stroke0Wave"]) 
            else [np.array(x.iloc[:x["stroke0Wave"].astype('int')+1])])

def get_poststroke_gcp(x):
    return ([None] #[np.array([None for i in range(maxWave)])] 
            if pd.isna(x["stroke0Wave"]) 
            else [np.array(x.iloc[x["stroke0Wave"].astype('int')+1:maxWave])])

In [None]:
gcpPreStroke0 = data.apply(get_prestroke_gcp)
gcpPreStroke0.index=["gcpPreStroke0"]
gcpPostStroke0 = data.apply(get_poststroke_gcp)
gcpPostStroke0.index = ["gcpPostStroke0"]
data=pd.concat([data,gcpPreStroke0,gcpPostStroke0])

indexWithStroke = gcpPreStroke0.notnull().loc["gcpPreStroke0"]

In [None]:
personWithStroke = indexWithStroke[indexWithStroke].index[0]
personWithStroke

In [None]:
data[personWithStroke]

In [None]:
data.loc["gcpPreStroke0",personWithStroke]

In [None]:
data.loc["gcpPostStroke0",personWithStroke]

In [None]:
data.loc["age0":"stroke0Wave",personWithStroke]

In [None]:
for i in range(maxWave-1):
    print(data.loc[f"{i}",personWithStroke],data.loc["age0",personWithStroke]+i,i)

In [None]:
#gcpPreStroke0Series = data.loc["gcpPreStroke0", data.loc["gcpPreStroke0"].notnull()]
gcpPreStroke0Series = gcpPreStroke0.loc["gcpPreStroke0",gcpPreStroke0.notnull().loc["gcpPreStroke0"]]

gcpPostStroke0Series = gcpPostStroke0.loc["gcpPostStroke0",gcpPostStroke0.notnull().loc["gcpPostStroke0"]]

In [None]:
gcpPreStroke0DF = pd.DataFrame( dict([ (k, pd.Series(v).iloc[::-1].reset_index(drop=True)) for k,v in gcpPreStroke0Series.items() ]))
gcpPreStroke0DF.head()

In [None]:
gcpPostStroke0DF = pd.DataFrame( dict([ (k, pd.Series(v).iloc[::1].reset_index(drop=True)) for k,v in gcpPostStroke0Series.items() ]))
gcpPostStroke0DF.head()

In [None]:
gcpPreStroke0DF[personWithStroke]

In [None]:
gcpPostStroke0DF[personWithStroke]

In [None]:
gcpPreStroke0DF["mean"] = gcpPreStroke0DF.mean(axis=1)
gcpPostStroke0DF["mean"] = gcpPostStroke0DF.mean(axis=1)

In [None]:
meanGcpPreStroke0 = gcpPreStroke0DF["mean"].iloc[::-1].tolist() 
meanGcpPostStroke0 = gcpPostStroke0DF["mean"].tolist()
meanGcp = meanGcpPreStroke0 + meanGcpPostStroke0 
time = [i for i in range(-len(meanGcpPreStroke0),1)] + [i for i in range(1,len(meanGcpPostStroke0))] 

In [None]:
len(meanGcp), len(time)

In [None]:
#time

In [None]:
plt.scatter(time,meanGcp)
plt.title("mean gcp of the stroke cohort")
plt.xlabel("time (0=year of stroke)")
plt.show()

In [None]:
meanGcpPostStroke0

In [None]:
data.loc["0":f"{maxWave-1}"].mean(axis=1)

In [None]:
test = data.loc["0":f"{maxWave-1}",gcpPreStroke0.notnull().loc["gcpPreStroke0"]].mean(axis=1).tolist()
plt.scatter(range(len(test)),test)
plt.show()

In [None]:
[f"{x}" for x in range(2015,2015)] + [f"{x}" for x in range(2015,2017)]