In [1]:
using Muon
using RData
using Revise
using ISCHIA
using DataFrames
using Combinatorics

In [2]:
mdata = readh5mu("../data/mudata.h5mu")
lr_network = load("../data/lr_network.rds")

mdata

MuData object 2185 ✕ 55545
└ Predictions
  AnnData object 2185 ✕ 34
└ Spatial
  AnnData object 2185 ✕ 36601
└ SCT
  AnnData object 2185 ✕ 18910

In [3]:
gene_names = mdata["Spatial"].var.name
mdata["Spatial"].var_names = gene_names

# Create LR_Pairs column
lr_network[!, :LR_Pairs] = string.(lr_network.from, "_", lr_network.to);
lr_network = lr_network[:, [:from, :to, :LR_Pairs]]

# Filter lr_network based on conditions
from_filter = in.(lr_network[:, :from], Ref(gene_names))
to_filter = in.(lr_network[:, :to], Ref(gene_names))
all_LR_network = lr_network[from_filter .& to_filter, :];

In [4]:
function main(all_LR_network; COI=["CC4"], startindex = 1, endindex = 500)
    # To reduce the computation time for this example, we randomly sample from the whole dataset of LR interactions

    # all_LR_network = all_LR_network[shuffle(1:size(all_LR_network_exp, 1)), :]
    all_LR_network = all_LR_network[startindex:min(endindex, end), :]

    # Extract unique genes and common genes
    all_LR_genes = unique(vcat(all_LR_network[:, :from], all_LR_network[:, :to]))
    all_LR_genes_comm = intersect(all_LR_genes, collect(gene_names));

    # Create LR.pairs and LR.pairs.AllCombos
    LR_pairs = all_LR_network[:, :LR_Pairs]
    all_combos = [join(combo, "_") for combo in combinations(all_LR_genes_comm, 2)];

    spatial_object = mdata["Spatial"]
    spatial_object.var_names = spatial_object.var.name
    Condition = unique(spatial_object.obs[!, "orig.ident"])
    LR_list = all_LR_genes_comm
    LR_pairs = LR_pairs
    exp_th = 1
    corr_th = 0.2

    out = enriched_LRs(spatial_object, COI, Condition, LR_list, LR_pairs, exp_th, corr_th)

    return out
end

main (generic function with 1 method)

In [None]:
result_4 = main(all_LR_network, endindex=size(all_LR_network, 1));
result_7 = main(all_LR_network, COI=["CC7"], endindex=size(all_LR_network, 1))
df_4 = result_4["enriched_LRs"];
df_7 = result_7["enriched_LRs"];

In [114]:
"""
Find LR (Ligand Receptor) pairs that are significantly co-occurring in one group and not in the other group.

# Arguments
- `group1_results`: Results from the EnrichedLRs function for Group 1.
- `group2_results`: Results from the EnrichedLRs function for Group 2.
- `group1_max_pval`: Maximum p-value threshold for significance levels of co-occurring LR pairs in Group 1.
- `group2_min_pval`: Minimum p-value threshold for non-significance levels of co-occurring LR pairs in Group 2.

# Returns
List of LR pairs enriched in Group 1 and not in Group 2.

# Example
```julia
result = find_differentially_cooccurring_LR_pairs(results_group1, results_group2, 0.05, 0.1)
"""
function find_differentially_cooccurring_LR_pairs(group1_results, group2_results, group1_max_pval, group2_min_pval)
    cooc_results_group1 = group1_results["cooccurrence_table"].results
    cooc_results_group2 = group2_results["cooccurrence_table"].results

    enriched_LR_pairs_group1 = DataFrame(pair=String[], group1_pval=Real[], group2_pval=Real[], pval_difference=Real[], observed_cooc=Int[])

    for row in eachrow(cooc_results_group1)
        if row.pair12 in cooc_results_group2.pair12
            group2_row = filter(r -> r.pair12 == row.pair12, cooc_results_group2)
            group1_pval = row.p_gt
            group2_pval = group2_row.p_gt[1]
            group1_observed_cooc = row.obs_cooccur
            group2_observed_cooc = group2_row.obs_cooccur[1]
            group1_expected_cooc = row.exp_cooccur
            group2_expected_cooc = group2_row.exp_cooccur[1]
            pval_difference = group2_pval - group1_pval

            if group1_pval < group1_max_pval && group2_pval > group2_min_pval &&
               group1_observed_cooc > 10 &&
               group1_observed_cooc != group1_expected_cooc && group2_observed_cooc != group2_expected_cooc &&
               group2_observed_cooc < group1_observed_cooc
                pair_data = DataFrame(pair=row.pair12, group1_pval=group1_pval, group2_pval=group2_pval,
                    pval_difference=pval_difference, observed_cooc=group1_observed_cooc
                )
                append!(enriched_LR_pairs_group1, pair_data)
            end
        end
    end

    enriched_LR_pairs_group1_sorted = sort(enriched_LR_pairs_group1, :observed_cooc, rev=true)

    return enriched_LR_pairs_group1_sorted
end

find_differentially_cooccurring_LR_pairs

In [116]:
diff_df47 = find_differentially_cooccurring_LR_pairs(result_4, result_7, 0.05, 0.1)

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64


In [112]:
diff_df74 = find_differentially_cooccurring_LR_pairs(result_7, result_4, 0.05, 0.1)

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64
1,HLA-A_HLA-B,0.0,0.27473,0.27473,235
2,HLA-B_LGALS3,0.00122,0.90385,0.90263,231
3,HLA-C_HLA-B,1.0e-5,0.5,0.49999,230
4,HLA-B_CD24,0.00028,0.5,0.49972,218
5,HLA-E_HLA-B,5.0e-5,0.15385,0.1538,217
6,HLA-A_HLA-C,0.0,0.13287,0.13287,210
7,HLA-A_LGALS3,0.0,0.13287,0.13287,210
8,CD74_LGALS3,0.0,0.14307,0.14307,207
9,HLA-B_THBS1,0.0,0.38462,0.38462,206
10,CD74_HLA-C,0.0,0.14307,0.14307,204


In [102]:
diff_df74[diff_df74.pair .== "NAMPT_RPS19", :]

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64
1,NAMPT_RPS19,0.0,0.29604,0.29604,131


In [107]:
r_74_pairs = ["CD24_CDHR5", "LGALS3_CDHR5", "HLA-A_SAA1", "HLA-A_CDHR5", "HLA-DRA_CDHR5", "HLA-F_TIMP1", "HLA-A_SERPINE1", "HLA-E_SERPINE1", "SERPINE1_CD74", "SAA1_HLA-DRA", "SERPINE1_HLA-DRA", "CD24_SERPINE1", "RPS19_SERPINE1", "THBS1_HLA-F", "SERPINE1_THBS1", "SERPINE1_LGALS3", "CDHR5_RACK1", "HLA-A_TNFRSF6B", "CD24_TNFRSF6B", "HLA-DRA_TNFRSF6B", "TNFRSF6B_CD74", "HLA-A_COL18A1", "SERPINE1_RPSA", "SERPINE1_RACK1", "HLA-A_HMGB1", "COL18A1_HLA-DRA", "HLA-E_HMGB1", "HMGB1_CD74", "HLA-A_CD163", "SAA1_RPS19", "HMGB1_LGALS3", "CD24_COL18A1", "HLA-DRA_HMGB1", "LGALS3_CD163", "RPS19_COL18A1", "COL18A1_RACK1", "THBS1_TNFRSF6B", "TNFRSF6B_RACK1", "HLA-DRA_CD163", "THBS1_CDHR5", "HMGB1_RACK1", "THBS1_HMGB1", "RACK1_CD163", "HLA-A_RARRES2", "THBS1_CD163", "RARRES2_HLA-DRA", "COL18A1_THBS1", "RARRES2_RACK1", "RARRES2_THBS1", "C3_SERPINE1", "HLA-A_PLAT", "PLAT_HLA-DRA", "RPS19_PLAT", "PLAT_RACK1", "HLA-A_ANXA6", "HLA-DRA_ANXA6", "ANXA6_CD74", "LGALS3_ANXA6", "RPS19_ANXA6", "PLAT_THBS1", "RACK1_ANXA6", "THBS1_ANXA6", "CCL21_THBS1", "CCL21_CD74", "HLA-A_IL7R", "HLA-DRA_IL7R", "CCL21_A2M", "CD24_IL7R", "HLA-A_CXCR4", "A2M_CXCR4", "THBS1_IL7R", "HLA-DRA_CXCR4", "IL7R_RPSA", "IL7R_RACK1", "CCL21_NAMPT", "PSAP_CXCR4", "HSP90B1_CXCR4", "HLA-A_IL10RA", "HLA-E_CXCR4", "THBS1_CXCR4", "HLA-DRA_IL10RA", "LGALS3_IL10RA", "NAMPT_IL7R", "CXCR4_RACK1", "IL10RA_CD74", "RPS19_IL10RA", "NAMPT_CXCR4", "IL10RA_RACK1", "CCL21_SERPINE1", "HLA-A_LGALS3", "LGALS3_CD74", "HLA-A_CD24", "HLA-A_HLA-E", "CD24_LGALS3", "HLA-E_CD74", "CD24_CD74", "CD24_HLA-E", "HLA-DRA_LGALS3", "HLA-E_HLA-DRA", "CCL19_HLA-A", "CCL19_HLA-E", "CCL19_THBS1", "CCL19_TIMP1", "HLA-A_PSAP", "CD24_HLA-DRA", "PSAP_CD74", "HLA-A_HSP90B1", "HLA-A_THBS1", "HSP90B1_CD74", "HLA-A_CALR", "CCL19_A2M", "PSAP_HLA-DRA", "CALR_LGALS3", "CALR_HLA-E", "HLA-E_HSP90B1", "HLA-A_A2M", "RPS19_CD74", "HSP90B1_HLA-DRA", "RPS19_HLA-E", "RACK1_CD74", "A2M_HLA-DRA", "CALR_HLA-DRA", "HLA-E_THBS1", "A2M_LGALS3", "THBS1_HLA-DRA", "RPSA_CD74", "LGALS3_RACK1", "A2M_HLA-E", "CD24_THBS1", "HLA-E_RACK1", "HLA-A_MIF", "RPS19_CD24", "PSAP_THBS1", "HLA-DRA_RACK1", "RPS19_PSAP", "RPS19_HLA-DRA", "PSAP_A2M", "MIF_CD74", "LGALS3_RPSA", "HLA-E_RPSA", "MIF_LGALS3", "HLA-A_TIMP1", "A2M_HSP90B1", "CD24_RPSA", "RPS19_HSP90B1", "PSAP_RACK1", "CALR_THBS1", "HLA-DRA_MIF", "TIMP1_LGALS3", "TIMP1_CD74", "A2M_RACK1", "HLA-DRA_TIMP1", "HLA-E_TIMP1", "HSP90B1_THBS1", "RPS19_CALR", "CALR_RACK1", "A2M_THBS1", "CALR_RPSA", "CD24_TIMP1", "THBS1_RPSA", "THBS1_RACK1", "RPSA_RACK1", "TIMP1_RACK1", "PSAP_TIMP1", "HSP90B1_TIMP1", "HLA-A_CALM2", "THBS1_TIMP1", "THBS1_MIF", "CALM2_HLA-DRA", "MIF_RACK1", "CALM2_CD24", "CALM2_HLA-E", "RPS19_MIF", "HLA-A_APOE", "NAMPT_CD74", "HLA-A_NAMPT", "HLA-E_NAMPT", "APOE_CD74", "CD74_HLA-DQB1", "HLA-A_HLA-DQB1", "LGALS3_HLA-DQB1", "PSAP_NAMPT", "APOE_LGALS3", "APOE_CD24", "RPS19_NAMPT", "A2M_NAMPT", "APOE_HLA-DRA", "HLA-DRA_HLA-DQB1", "CALR_NAMPT", "NAMPT_RACK1", "CALM2_THBS1", "CALM2_RACK1", "NAMPT_THBS1", "NAMPT_TIMP1", "RPS19_HLA-DQB1", "GSTP1_CD74", "GSTP1_LGALS3", "HLA-A_GSTP1", "RPS19_APOE", "RACK1_HLA-DQB1", "HLA-A_RRBP1", "LGALS3_RRBP1", "HLA-A_CALM1", "CD24_GSTP1", "CD24_RRBP1", "HLA-DRA_RRBP1", "GSTP1_HLA-DRA", "HLA-DRA_HLA-F", "APOE_THBS1", "APOE_RACK1", "HLA-A_HLA-F", "HLA-E_HLA-F", "CALM1_LGALS3", "CD24_HLA-F", "HLA-F_LGALS3", "CALM1_HLA-DRA", "THBS1_HLA-DQB1", "PSAP_HLA-F", "RPS19_RRBP1", "HLA-DRA_PKD1", "PKD1_RACK1", "GSTP1_THBS1", "GSTP1_RACK1", "CALM1_RACK1", "A2M_HLA-F", "SAA1_LGALS3", "HLA-F_RACK1", "RPS19_HLA-F", "C3_THBS1", "CALM1_THBS1", "THBS1_RRBP1", "RACK1_RRBP1"]
j_74_pairs = diff_df74.pair

not_in_j = setdiff(r_74_pairs, j_74_pairs)
in_both = intersect(r_74_pairs, j_74_pairs)
not_in_j_reversed = map(x -> string(split(x, "_")[2], "_", split(x, "_")[1]), not_in_j);

In [108]:
setdiff(Set([in_both; not_in_j_reversed]), j_74_pairs)

Set{String}()

In [113]:
setdiff(j_74_pairs, Set([in_both; not_in_j_reversed]))

54-element Vector{String}:
 "HLA-A_HLA-B"
 "HLA-B_LGALS3"
 "HLA-C_HLA-B"
 "HLA-B_CD24"
 "HLA-E_HLA-B"
 "HLA-A_HLA-C"
 "HLA-B_THBS1"
 "CD74_HLA-C"
 "HLA-B_PSAP"
 "HLA-C_LGALS3"
 ⋮
 "HLA-B_HMGB1"
 "HLA-B_CD163"
 "HLA-B_RARRES2"
 "HLA-C_RARRES2"
 "HLA-B_PLAT"
 "HLA-C_PLAT"
 "ANXA6_HLA-C"
 "HLA-C_IL10RA"
 "HLA-C_CCL19"

In [72]:
diff_df74[diff_df74.pair .== "RACK1_RRBP1", :]

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64
1,RACK1_RRBP1,0.0,0.30769,0.30769,100


In [6]:
result_4

Dict{String, Any} with 2 entries:
  "cooccurrence_table" => CooccurOutput([1m621×13 DataFrame[0m[0m…
  "enriched_LRs"       => [1m3×6 DataFrame[0m[0m…

In [7]:
group1_results = result_4
group2_results = result_7
group1_max_pval = 0.05
group2_min_pval = 0.1;

In [15]:
cooc_results_group1 = result_7["cooccurrence_table"].results
cooc_results_group2 = result_4["cooccurrence_table"].results;

In [22]:
enriched_LR_pairs_group1 = DataFrame(pair=String[], group1_pval=Real[], group2_pval=Real[], pval_difference=Real[], observed_cooc=Int[])

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64


In [23]:
for row in eachrow(cooc_results_group1)
    if row.pair12 in cooc_results_group2.pair12
        group1_pval = row.p_gt
        group2_pval = filter(r -> r.pair12 == row.pair12, cooc_results_group2).p_gt[1]
        group1_observed_cooc = row.obs_cooccur
        group2_observed_cooc = filter(r -> r.pair12 == row.pair12, cooc_results_group2).obs_cooccur[1]
        group1_expected_cooc = row.exp_cooccur
        group2_expected_cooc = filter(r -> r.pair12 == row.pair12, cooc_results_group2).exp_cooccur[1]
        pval_difference = group2_pval - group1_pval

        if group1_pval < group1_max_pval && group2_pval > group2_min_pval && group1_observed_cooc > 10 && group1_observed_cooc != group1_expected_cooc && group2_observed_cooc != group2_expected_cooc && group2_observed_cooc < group1_observed_cooc
            # println(row.pair12)
            pair_data = DataFrame(pair=row.pair12, group1_pval=group1_pval, group2_pval=group2_pval, pval_difference=pval_difference, observed_cooc=group1_observed_cooc)
            append!(enriched_LR_pairs_group1, pair_data)
        end
    end
end

In [25]:
# enriched_LR_pairs_group1
enriched_LR_pairs_group1_sorted = sort(enriched_LR_pairs_group1, :observed_cooc, rev=true)

Row,pair,group1_pval,group2_pval,pval_difference,observed_cooc
Unnamed: 0_level_1,String,Real,Real,Real,Int64
1,HLA-A_HLA-B,0.0,0.27473,0.27473,235
2,HLA-B_LGALS3,0.00122,0.90385,0.90263,231
3,HLA-C_HLA-B,1.0e-5,0.5,0.49999,230
4,HLA-B_CD24,0.00028,0.5,0.49972,218
5,HLA-E_HLA-B,5.0e-5,0.15385,0.1538,217
6,HLA-A_HLA-C,0.0,0.13287,0.13287,210
7,HLA-A_LGALS3,0.0,0.13287,0.13287,210
8,CD74_LGALS3,0.0,0.14307,0.14307,207
9,HLA-B_THBS1,0.0,0.38462,0.38462,206
10,CD74_HLA-C,0.0,0.14307,0.14307,204


In [None]:

enriched_LR_pairs_group1 = enriched_LR_pairs_group1[2:end, :]
rownames(enriched_LR_pairs_group1) .= enriched_LR_pairs_group1.pair

select!(enriched_LR_pairs_group1_sorted, Not(:pair))