In [51]:
using Muon
using RData
using Revise
using ISCHIA
using DataFrames
using Statistics
using Combinatorics

In [52]:
mdata = readh5mu("../data/mudata.h5mu")
lr_network = load("../data/lr_network.rds")

mdata

MuData object 2185 ✕ 55545
└ Predictions
  AnnData object 2185 ✕ 34
└ Spatial
  AnnData object 2185 ✕ 36601
└ SCT
  AnnData object 2185 ✕ 18910

In [53]:
gene_names = mdata["Spatial"].var.name
mdata["Spatial"].var_names = gene_names

# Create LR_Pairs column
lr_network[!, :LR_Pairs] = string.(lr_network.from, "_", lr_network.to);
lr_network = lr_network[:, [:from, :to, :LR_Pairs]]

# Filter lr_network based on conditions
from_filter = in.(lr_network[:, :from], Ref(gene_names))
to_filter = in.(lr_network[:, :to], Ref(gene_names))
all_LR_network = lr_network[from_filter .& to_filter, :];

In [54]:
function main(all_LR_network; startindex = 1, endindex = 500)
    # To reduce the computation time for this example, we randomly sample from the whole dataset of LR interactions

    # all_LR_network = all_LR_network[shuffle(1:size(all_LR_network_exp, 1)), :]
    all_LR_network = all_LR_network[startindex:min(endindex, end), :]

    # Extract unique genes and common genes
    all_LR_genes = unique(vcat(all_LR_network[:, :from], all_LR_network[:, :to]))
    all_LR_genes_comm = intersect(all_LR_genes, collect(gene_names));

    # Create LR.pairs and LR.pairs.AllCombos
    LR_pairs = all_LR_network[:, :LR_Pairs]
    all_combos = [join(combo, "_") for combo in combinations(all_LR_genes_comm, 2)];

    spatial_object = mdata["Spatial"]
    COI = ["CC4"]
    Condition = unique(spatial_object.obs[!, "orig.ident"])
    LR_list = all_LR_genes_comm
    LR_pairs = LR_pairs
    exp_th = 1
    corr_th = 0.2

    out = enriched_LRs_refactored(spatial_object, COI, Condition, LR_list, LR_pairs, exp_th, corr_th)

    return out
end

main (generic function with 1 method)

In [55]:
# @time main(all_LR_network, endindex=8000)
# 6 sec for CC7

In [57]:
out = main(all_LR_network, endindex=8000)

Preparing L-R presence/absence matrix
Calculating L-R pairs correlation
Preparing for cooccurrence


[32mCalculate Co-occurrences  45%|█████████████              |  ETA: 0:00:00[39m[K

[32mCalculate Co-occurrences 100%|███████████████████████████| Time: 0:00:00[39m[K


Cooccurrence calculation starts...
Cooccurrence calculation ended

Summary of cooccurrence results:
Of 171991 species pair combinations, 171715 pairs (99.84%) were removed from the analysis because expected co-occurrence was < 1 and
276 pairs were analyzed

Cooccurrence Summary:

Species => 587
Non-random (%) => 12.0
Sites => 14
Negative => 0
Random => 196
Positive => 33
Unclassifiable => 47

Probability table of cooccurrence results:


Row,sp1,sp2,sp1_inc,sp2_inc,obs_cooccur,prob_cooccur,exp_cooccur,p_lt,p_gt,sp1_name,sp2_name
Unnamed: 0_level_1,Integer,Integer,Integer,Integer,Integer,Real,Real,Real,Real,String?,String?
1,71,121,3,5,3,0.077,1.1,1.0,0.02747,CALM2,RPSA
2,71,226,3,9,3,0.138,1.9,1.0,0.23077,CALM2,HLA-A
3,71,227,3,6,2,0.092,1.3,0.94505,0.38462,CALM2,HLA-E
4,71,229,3,7,3,0.107,1.5,1.0,0.09615,CALM2,HLA-C
5,71,230,3,11,3,0.168,2.4,1.0,0.4533,CALM2,HLA-B
6,71,233,3,9,3,0.138,1.9,1.0,0.23077,CALM2,HLA-DRA
7,71,238,3,7,2,0.107,1.5,0.90385,0.5,CALM2,CD24
8,71,267,3,6,3,0.092,1.3,1.0,0.05495,CALM2,NAMPT
9,71,306,3,5,2,0.077,1.1,0.97253,0.27473,CALM2,CCL21
10,71,433,3,8,2,0.122,1.7,0.84615,0.61538,CALM2,THBS1


1 / 4
2 / 4
3 / 4
4 / 4


Dict{String, Any} with 2 entries:
  "cooccurrence_table" => CooccurOutput([1m276×13 DataFrame[0m[0m…
  "enriched_LRs"       => [1m3×6 DataFrame[0m[0m…

In [58]:
out["enriched_LRs"]

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,C3,CXCR4,0.79157,1.3346,3.3869,C3_CXCR4
2,CCL19,CXCR4,0.757651,4.2714,3.3869,CCL19_CXCR4
3,RPS19,RPSA,0.647974,0.5695,0.4499,RPS19_RPSA


In [19]:
out["enriched_LRs"]

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,C3,CXCR4,0.79157,1.3346,3.3869,C3_CXCR4
2,CCL19,CXCR4,0.757651,4.2714,3.3869,CCL19_CXCR4
3,RPS19,RPSA,0.647974,0.5695,0.4499,RPS19_RPSA


In [None]:
df = DataFrame(label=String[], time=Float64[])

for endi in [500, 1000, 1500, 2000, 3000, 5000, 8000]
    for i in 1:20
        t = @elapsed main(all_LR_network, endindex = endi)
        push!(df, ["0:$endi", t])
    end
end

In [7]:
df_mean = combine(groupby(df, :label), :time => mean)
# df_mean[!, :lang] .= "Julia"
df_mean

Row,label,time_mean
Unnamed: 0_level_1,String,Float64
1,0:500,0.108225
2,0:1000,0.206401
3,0:1500,0.338295
4,0:2000,0.475303
5,0:3000,0.503264
6,0:5000,0.489117
7,0:8000,0.497635


In [8]:
df_mean[!, :r_mean] = [65.32004, 147.86019, 253.04721, 258.49103, 263.81379, 379.28183, 535.44503]
df_mean[!, :speedup] .= df_mean.r_mean ./ df_mean.time_mean
df_mean

Row,label,time_mean,r_mean,speedup
Unnamed: 0_level_1,String,Float64,Float64,Float64
1,0:500,0.108225,65.32,603.557
2,0:1000,0.206401,147.86,716.373
3,0:1500,0.338295,253.047,748.009
4,0:2000,0.475303,258.491,543.845
5,0:3000,0.503264,263.814,524.205
6,0:5000,0.489117,379.282,775.442
7,0:8000,0.497635,535.445,1075.98


In [9]:
# # Results from R benchmark
# r_scores = [65.32004, 147.86019, 253.04721, 258.49103, 263.81379, 379.28183, 535.44503]
# labels = df_mean[!, :label]
# for i in 1:length(labels)
#     push!(df_mean, [labels[i], r_scores[i], "R"])
# end
# df_mean

In [10]:
# @df df_mean groupedhist(:time_mean, group = :lang, bar_position = :dodge)

In [11]:
# groupedbar(df_mean[!, :label], df_mean[!, :time_mean], group = df_mean[!, :lang], xlabel = "Groups", ylabel = "Scores",
#         title = "Scores by group and category", bar_width = 0.67,
#         lw = 0, framestyle = :box)