In [None]:
using Plotly
using DataFrames
using CSV
using Statistics

In [None]:
df = CSV.read("The_single_cell_transcriptome_as_a_component_system/MouseCellAtlas/mca/mainTable_Bladder.csv", DataFrame);

In [None]:
df_genes = CSV.read("MGImarkerQuery_20200914_050053.txt", DataFrame);

In [None]:
unique(df_genes[!,"Feature Type"])

In [None]:
#lncgenes = df_genes[df_genes[!,"Feature Type"].=="lncRNA gene",:Symbol];
lncgenes = df_genes[df_genes[!,"Feature Type"].!="protein coding gene",:Symbol];
pcgenes = df_genes[df_genes[!,"Feature Type"].=="protein coding gene",:Symbol];

In [None]:
df_lnc = df[[!(g in pcgenes) for g in df.Column1], :];
df = df[[g in pcgenes for g in df.Column1], :];

In [None]:
f_lnc = []
M = [sum(col) for col in eachcol(df_lnc)[2:end]];
for row in eachrow(df_lnc)[2:end]
    append!(f_lnc, reduce(+, [x/m for (x,m) in zip(row[2:end],M) if m>0], dims=1)) 
end

In [None]:
f = []
M = [sum(col) for col in eachcol(df)[2:end]];
for row in eachrow(df)[2:end]
    append!(f, reduce(+, [x/m for (x,m) in zip(row[2:end],M) if m>0], dims=1)) 
end

In [None]:
f = sort(f ./ sum(f), rev=true);
f_lnc = sort(f_lnc ./ sum(f_lnc), rev=true);

In [None]:
trace = Plotly.scatter(y=f, name="protein coding", line_color="blue", line_width=10)
tracelnc = Plotly.scatter(y=f_lnc, name="lnc", line_color="purple", line_width=10)


layout = Layout(
    title = "Bladder MCA",
    xaxis_type="log",
    xaxis_tickfont_size=20,
    xaxis_title = "rank, i",
    xaxis_titlefont = 25,
    #xaxis_tickformat = "e",
    yaxis_type="log",
    yaxis_tickformat = "e",
    yaxis_title = "frequency, fi",
    yaxis_titlefont = 25,
    yaxis_tickfont_size=20,
    legendfont_size=30
)


data = [trace, tracelnc]

p = Plotly.plot(data, layout)
#Plotly.show()
#Plotly.savefig(p, "zipf_lnc_bladder.pdf")

In [None]:
cv2lnc = [var(row) / mean(row) / mean(row)  for row in eachrow(df_lnc[2:end,2:end])];
meanslnc = [mean(row)  for row in eachrow(df_lnc[2:end,2:end])];

cv2 = [var(row) / mean(row) / mean(row)  for row in eachrow(df[2:end,2:end])];
means = [mean(row)  for row in eachrow(df[2:end,2:end])];

In [None]:
trace = Plotly.scatter(x=means, y=cv2, name="protein coding", line_color="gray", mode="markers", line_width=10)
tracelnc = Plotly.scatter(x=meanslnc, y=cv2lnc, name="lnc", line_color="purple", mode="markers", line_width=10)


layout = Layout(
    title = "Lung MCA",
    xaxis_tickfont_size=20,
     xaxis_type="log",
    xaxis_title = "mean",
    xaxis_titlefont = 25,
    yaxis_type="log",
    yaxis_tickformat = "e",
    yaxis_title = "CV2",
    yaxis_titlefont = 25,
    yaxis_tickfont_size=20,
    legendfont_size=30
)


data = [trace, tracelnc]

p = Plotly.plot(data, layout)
#Plotly.savefig(p, "cv2_lnc_bladder.pdf")

In [None]:
function isnull(x)
    return x==0
end

function isnotnull(x)
    return ~isnull(x)
end

function get_occurrence(row)
    return length(filter(isnotnull, [x for x in row]))/length(row)
end

In [None]:
Olnc = [get_occurrence(row)  for row in eachrow(df_lnc[2:end,2:end])];

O = [get_occurrence(row)  for row in eachrow(df[2:end,2:end])];

In [None]:
trace = Plotly.histogram(x=O, name="protein coding", histnorm="probability", marker_color="gray", opacity=0.8)
tracelnc = Plotly.histogram(x=Olnc, name="lnc", histnorm="probability", marker_color="purple", opacity=0.3)


layout = Layout(
    title = "Bladder MCA",
    xaxis_tickfont_size=20,
    #xaxis_type="log",
    xaxis_title = "Occurrence",
    xaxis_titlefont = 25,
    yaxis_type="log",
    yaxis_tickformat = "e",
    yaxis_titlefont = 25,
    yaxis_tickfont_size=20,
    legendfont_size=30,
    barmode="overlay"
)


data = [trace, tracelnc]

p = Plotly.plot(data, layout)
#Plotly.savefig(p, "U_lnc_bladder.pdf")