In [92]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from scipy.stats import spearmanr
import os
import numpy as np

In [10]:
df = pd.read_csv("datasets/gtex10/mainTable.csv", index_col=0)
df_files = pd.read_csv("datasets/gtex10/files.dat").set_index("SAMPID")

In [29]:
def get_corr(df):
    return df.join(df_files["SMTS"]).groupby("SMTS").mean().transpose().corr()

In [62]:
df_topics = [
    pd.read_csv("datasets/gtex10/topsbm/topsbm_level_2_topic-dist.csv").drop(["i_doc"], axis=1).set_index("doc"),
    pd.read_csv("datasets/gtex10/lda/lda_level_2_topic-dist.csv").drop(["i_doc"], axis=1).set_index("doc"),
    pd.read_csv("datasets/gtex10/tm/tm_level_0_topic-dist.csv").drop(["i_doc"], axis=1).set_index("doc"),
    pd.read_csv("datasets/gtex10/wgcna/wgcna_level_0_topic-dist.csv").drop(["i_doc"], axis=1).set_index("doc"),
]



In [63]:
corr_data = get_corr(df.transpose())
corr_top = list(map(get_corr, df_topics))

In [64]:
tissues = corr_data.columns
spearmans = []
for tissue in tissues:
    s = [spearmanr(corr_data[tissue].reindex(index=tissues), ctop[tissue].reindex(index=tissues))[0] for ctop in corr_top]
    spearmans.append(s)

In [109]:
fig = make_subplots(2,2)
ls={"width":5, "dash":"dash", "color":"gray"}
fig.add_trace(go.Bar(y=[s[0] for s in spearmans], name="hSBM"), row=1, col=1)
fig.add_trace(go.Scatter(x=[0, len(tissues)-1], y=np.repeat(np.mean([s[0] for s in spearmans]),2), mode="lines", line=ls, name="mean"), row=1, col=1)
fig.add_trace(go.Bar(y=[s[1] for s in spearmans], name="LDA"), row=1, col=2)
fig.add_trace(go.Scatter(x=[0, len(tissues)-1], y=np.repeat(np.mean([s[1] for s in spearmans]),2), mode="lines", line=ls, name="mean"), row=1, col=2)
fig.add_trace(go.Bar(y=[s[2] for s in spearmans], name="TM"), row=2, col=1)
fig.add_trace(go.Scatter(x=[0, len(tissues)-1], y=np.repeat(np.mean([s[2] for s in spearmans]),2), mode="lines", line=ls, name="mean"), row=2, col=1)
fig.add_trace(go.Bar(y=[s[3] for s in spearmans], name="WGCNA"), row=2, col=2)
fig.add_trace(go.Scatter(x=[0, len(tissues)-1], y=np.repeat(np.mean([s[3] for s in spearmans]),2), mode="lines", line=ls, name="mean"), row=2, col=2)


fig.update_yaxes({"range":[0,1.1], "title":"Spearman correlation", "tickfont":{"size":24}})
fig.update_xaxes({"tickmode":"array", "tickvals":list(range(len(tissues))), "ticktext": tissues, "tickangle":45,"tickfont":{"size":18}})

fig.update_layout(width=1000, height=900)

fig.show()
fig.write_image("spearman.pdf")