In [44]:
using Muon
using RData
using Revise
using ISCHIA
using DataFrames
using Combinatorics

In [45]:
mdata = readh5mu("../data/mudata.h5mu")
lr_network = load("../data/lr_network.rds")

mdata

MuData object 2185 ✕ 55545
└ Predictions
  AnnData object 2185 ✕ 34
└ Spatial
  AnnData object 2185 ✕ 36601
└ SCT
  AnnData object 2185 ✕ 18910

In [46]:
gene_names = mdata["SCT"].var.name
# mdata["Spatial"].var_names = gene_names

# Create LR_Pairs column
lr_network[!, :LR_Pairs] = string.(lr_network.from, "_", lr_network.to);
lr_network = lr_network[:, [:from, :to, :LR_Pairs]]

# Filter lr_network based on conditions
from_filter = in.(lr_network[:, :from], Ref(gene_names))
to_filter = in.(lr_network[:, :to], Ref(gene_names))
all_LR_network = lr_network[from_filter .& to_filter, :];
size(all_LR_network)

(7067, 3)

In [84]:
mdata["SCT"].var.name

18910-element Vector{String}:
 "AL627309.1"
 "AL627309.5"
 "LINC01409"
 "LINC01128"
 "LINC00115"
 "FAM41C"
 "LINC02593"
 "SAMD11"
 "NOC2L"
 "KLHL17"
 ⋮
 "POU3F4"
 "PABPC5"
 "NXF3"
 "AL023653.1"
 "AFF2"
 "TREX2"
 "F8A3"
 "AC136616.1"
 "AC007325.2"

In [47]:
# Extract unique genes and common genes
all_LR_genes = unique(vcat(all_LR_network[:, :from], all_LR_network[:, :to]))
all_LR_genes_comm = intersect(all_LR_genes, collect(gene_names));

# Create LR.pairs and LR.pairs.AllCombos
LR_pairs = all_LR_network[:, :LR_Pairs]
all_combos = [join(combo, "_") for combo in combinations(all_LR_genes_comm, 2)];

In [48]:
spatial_object = mdata["Spatial"]
spatial_object.var_names = spatial_object.var.name
Condition = unique(spatial_object.obs[!, "orig.ident"])
LR_list = all_LR_genes_comm
LR_pairs = LR_pairs
exp_th = 1
corr_th = 0.2

0.2

In [None]:
lr_cc4 = find_enriched_LR_pairs(spatial_object, ["CC4"], Condition, LR_list, LR_pairs, exp_th, corr_th);
lr_cc7 = find_enriched_LR_pairs(spatial_object, ["CC7"], Condition, LR_list, LR_pairs, exp_th, corr_th);

In [50]:
lr_cc7["enriched_LRs"]

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,LGALS3,ITGB4,0.832963,0.7695,0.8609,LGALS3_ITGB4
2,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
3,CDHR2,CDHR5,0.830117,0.9662,0.7361,CDHR2_CDHR5
4,HLA-E,ITGA6,0.780889,1.1673,0.7086,HLA-E_ITGA6
5,HLA-E,PLXNB2,0.779063,1.1673,0.7971,HLA-E_PLXNB2
6,COL1A1,ITGA5,0.764293,1.9866,1.872,COL1A1_ITGA5
7,DSC2,LSR,0.7638,0.8149,0.7539,DSC2_LSR
8,DSC2,EPHA2,0.759718,0.8149,0.8393,DSC2_EPHA2
9,GRN,TNFRSF1A,0.748926,1.1756,1.0184,GRN_TNFRSF1A
10,DSG2,EPHA2,0.737155,0.7307,0.8393,DSG2_EPHA2


In [83]:
lr_cc7

Dict{String, Any} with 2 entries:
  "cooccurrence_table" => CooccurOutput([1m65793×13 DataFrame[0m[0m…
  "enriched_LRs"       => [1m540×6 DataFrame[0m[0m…

In [43]:
lr_cc7["enriched_LRs"]

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,LGALS3,ITGB4,0.832963,0.7695,0.8609,LGALS3_ITGB4
2,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
3,CDHR2,CDHR5,0.830117,0.9662,0.7361,CDHR2_CDHR5
4,HLA-E,ITGA6,0.780889,1.1673,0.7086,HLA-E_ITGA6
5,HLA-E,PLXNB2,0.779063,1.1673,0.7971,HLA-E_PLXNB2
6,COL1A1,ITGA5,0.764293,1.9866,1.872,COL1A1_ITGA5
7,DSC2,LSR,0.7638,0.8149,0.7539,DSC2_LSR
8,DSC2,EPHA2,0.759718,0.8149,0.8393,DSC2_EPHA2
9,GRN,TNFRSF1A,0.748926,1.1756,1.0184,GRN_TNFRSF1A
10,DSG2,EPHA2,0.737155,0.7307,0.8393,DSG2_EPHA2


In [79]:
function find_duplicate_rows(df, column_name)
    # Get the unique values in the specified column
    unique_values = unique(df[!, column_name])
    
    # Initialize an empty DataFrame to store the duplicate rows
    duplicate_rows = DataFrame()
    
    for value in unique_values
        # Filter rows where the specified column matches the current value
        subset = filter(row -> row[column_name] == value, df)
        
        # If there are more than one row with the same value, append them to duplicate_rows
        if nrow(subset) > 1
            append!(duplicate_rows, DataFrame(first(subset)))
        end
    end
    
    return duplicate_rows
end

find_duplicate_rows (generic function with 1 method)

In [80]:
duplicates = find_duplicate_rows(df_7, "correlation")

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
2,DSC2,DSG2,0.722542,0.8149,0.7307,DSC2_DSG2
3,CDH1,PTPRF,0.605276,0.8122,0.7244,CDH1_PTPRF
4,F11R,ITGB2,0.55938,0.942,2.1694,F11R_ITGB2
5,ITGB2,ICAM1,0.463724,2.1694,1.9715,ITGB2_ICAM1
6,PTPRC,CD2,0.394723,1.6397,1.6859,PTPRC_CD2
7,JAM3,ITGB2,0.317787,2.1433,2.1694,JAM3_ITGB2
8,PVR,CDH1,0.276865,0.7666,0.8122,PVR_CDH1
9,ICAM2,ITGB2,0.248302,1.8473,2.1694,ICAM2_ITGB2
10,F11R,ITGAL,0.203744,0.942,1.2148,F11R_ITGAL


In [82]:
unique(df_7, :correlation)

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,LGALS3,ITGB4,0.832963,0.7695,0.8609,LGALS3_ITGB4
2,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
3,HLA-E,ITGA6,0.780889,1.1673,0.7086,HLA-E_ITGA6
4,HLA-E,PLXNB2,0.779063,1.1673,0.7971,HLA-E_PLXNB2
5,COL1A1,ITGA5,0.764293,1.9866,1.872,COL1A1_ITGA5
6,DSC2,LSR,0.7638,0.8149,0.7539,DSC2_LSR
7,DSC2,EPHA2,0.759718,0.8149,0.8393,DSC2_EPHA2
8,GRN,TNFRSF1A,0.748926,1.1756,1.0184,GRN_TNFRSF1A
9,DSG2,EPHA2,0.737155,0.7307,0.8393,DSG2_EPHA2
10,DSG2,LRP10,0.735606,0.7307,0.9101,DSG2_LRP10


In [51]:
df_7 = lr_cc7["enriched_LRs"]
df_7.to

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,LGALS3,ITGB4,0.832963,0.7695,0.8609,LGALS3_ITGB4
2,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
3,CDHR2,CDHR5,0.830117,0.9662,0.7361,CDHR2_CDHR5
4,HLA-E,ITGA6,0.780889,1.1673,0.7086,HLA-E_ITGA6
5,HLA-E,PLXNB2,0.779063,1.1673,0.7971,HLA-E_PLXNB2
6,COL1A1,ITGA5,0.764293,1.9866,1.872,COL1A1_ITGA5
7,DSC2,LSR,0.7638,0.8149,0.7539,DSC2_LSR
8,DSC2,EPHA2,0.759718,0.8149,0.8393,DSC2_EPHA2
9,GRN,TNFRSF1A,0.748926,1.1756,1.0184,GRN_TNFRSF1A
10,DSG2,EPHA2,0.737155,0.7307,0.8393,DSG2_EPHA2


In [None]:
df_7.save

In [None]:
filter(r -> r.x1 != 0, df_7)

In [15]:
duplicates = find_duplicate_rows(lr_cc7["enriched_LRs"], "correlation")

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,CDHR5,CDHR2,0.830117,0.7361,0.9662,CDHR5_CDHR2
2,CDHR2,CDHR5,0.830117,0.9662,0.7361,CDHR2_CDHR5
3,DSC2,DSG2,0.722542,0.8149,0.7307,DSC2_DSG2
4,DSG2,DSC2,0.722542,0.7307,0.8149,DSG2_DSC2
5,CDH1,PTPRF,0.605276,0.8122,0.7244,CDH1_PTPRF
6,PTPRF,CDH1,0.605276,0.7244,0.8122,PTPRF_CDH1
7,F11R,ITGB2,0.55938,0.942,2.1694,F11R_ITGB2
8,ITGB2,F11R,0.55938,2.1694,0.942,ITGB2_F11R
9,ITGB2,ICAM1,0.463724,2.1694,1.9715,ITGB2_ICAM1
10,ICAM1,ITGB2,0.463724,1.9715,2.1694,ICAM1_ITGB2


In [17]:
r_pairs = ["LGALS3_ITGB4", "CDHR5_CDHR2", "HLA-E_ITGA6", "HLA-E_PLXNB2", "COL1A1_ITGA5", "DSC2_LSR", "DSC2_EPHA2", "GRN_TNFRSF1A", "DSG2_EPHA2", "DSG2_LRP10", "LAMC2_ITGA2", "DSG2_DSC2", "RPS19_RPSA", "TNC_ITGA5", "MIF_CD74", "DSG2_LSR", "CALM1_TNFRSF1A", "CDH1_DDR1", "ITGB1_ITGA6", "PLAU_PLAUR", "GRN_SORT1", "LAMC2_ITGB4", "TNC_ITGA2", "CALR_ITGA3", "LGALS3_NPTN", "ITGB1_CDH17", "PSAP_SORT1", "PLXNB2_MST1R", "CALR_ITGAV", "HLA-A_ERBB3", "LGALS3_TFRC", "MMP9_ITGA5", "LAMC2_ITGA6", "EFNA2_EPHA2", "ADAM9_ITGA6", "CCL2_ACKR1", "EFNA1_EPHA2", "ITGB1_ITGA5", "RTN4_EPHA2", "ITGB1_CD151", "CEACAM1_INSR", "ADAM9_ITGAV", "CDH1_PTPRF", "LGALS3_CLEC7A", "EFNB2_EPHA2", "HSP90B1_LDLR", "ANXA1_CD44", "APP_ITGA6", "EFNB1_EPHA2", "MDK_LRP1", "CALR_ITGA5", "HSPG2_FLT1", "ITGB1_RACK1", "DSG2_ITPR3", "ITGB1_PLAUR", "VEGFA_ITGAV", "ADAM9_ITGA3", "ITGB1_ITGAV", "APOE_SDC1", "ITGB1_ITGA3", "ITGB1_EPHA2", "ITGB1_ENG", "ITGB2_F11R", "COL1A1_CD36", "PSAP_LRP1", "CLSTN1_IGF2R", "PTPRC_CD44", "CALR_LRP1", "LAMC2_ITGA3", "PLXNB2_MET", "CDH1_MET", "MDK_SDC4", "PECAM1_ITGA5", "APP_SDC1", "CALR_CD44", "DSC2_INSR", "ITGB1_FAT1", "LAMC3_ITGA6", "CLSTN1_TFRC", "RTN4_CD44", "ADAM9_ITGB5", "HLA-E_PLXNA3", "APP_TNFRSF14", "CLSTN1_LRP1", "CDH1_RACK1", "MIF_TNFRSF14", "ITGB2_PLAUR", "LAMC2_CD151", "TNC_SDC1", "PLAU_CD44", "COL1A1_CD44", "ICAM1_ITGAX", "ANXA1_ACKR3", "VEGFA_FLT1", "TNC_ITGB6", "CALM1_INSR", "MDK_SDC1", "LGALS3_GPR35", "ITGB1_IGF2R", "CDH1_SLC1A5", "ITGB1_ITGA1", "HSPG2_ITGA2", "ITGB1_PTK2B", "ARF1_INSR", "CDH1_NCSTN", "GSTP1_CD44", "ANXA1_MET", "ANXA1_F2R", "PTPRC_CXCR4", "ITGB1_SDC1", "TNC_ITGAX", "LAMB1_ITGA6", "DSG2_FZD5", "ITGB1_CD46", "APOE_SDC4", "CDH1_ITGB7", "ITGB1_CD47", "CALR_NCSTN", "CALR_LGR4", "LAMB1_ITGA1", "CEACAM1_PTPN6", "APP_F2R", "HSPG2_LRP1", "ICAM1_IL2RG", "CD99_EPHA2", "COL1A1_SDC1", "HLA-DRA_CD4", "LIPH_LRP5", "TNC_ITGAV", "HSP90B1_LRP1", "NECTIN2_TNFRSF12A", "CALR_LRP5", "DSC2_FZD5", "ITGB2_ICAM1", "ITGB1_NCSTN", "TNFSF13_TNFRSF1A", "COL4A1_ITGA1", "PTPRF_RACK1", "CXCL3_ACKR3", "LGALS3_FCGR2A", "CLSTN1_CD47", "PECAM1_ITGAV", "RPS19_CD4", "PTPRF_MET", "EFNB2_SDC1", "ICAM1_CSF2RB", "A2M_LRP1", "APOE_LRP1", "FN1_RPSA", "ITGB1_SLC1A5", "CALM1_VIPR1", "PTPRF_INSR", "COL1A1_ITGAV", "HSPG2_KDR", "CDH1_KDR", "BMP2_ENG", "VEGFA_PTK2B", "MMP9_CD44", "ITGB1_ITGA8", "CXCL9_CXCR4", "COL1A1_ITGA2", "CDH1_EGFR", "GUCA2A_GUCY2C", "APOE_LDLR", "PIK3CB_FCGR3A", "VWF_ITGB5", "CCL5_CDH1", "CXCL9_BDKRB2", "ARF1_CD44", "DSC2_DLG1", "LAMC3_ITGAV", "TNFSF10_PIK3CB", "ITGB1_ITGA2", "APOE_SDC3", "ARF1_CD4", "CXCL9_LPAR2", "LRPAP1_SORT1", "TGFB1_ACVRL1", "TNFSF13_TNFRSF14", "SAA1_F2RL1", "NID1_NOTCH2", "CALR_IL7R", "ANXA1_C3AR1", "HSP90B1_TNFRSF1B", "LGALS3_ITGA2", "LAMB1_ITGA3", "BMP2_BMPR2", "COL1A1_ITGA1", "ANXA1_BDKRB2", "PTPRF_ERBB2", "APP_EPHA2", "NID1_NOTCH3", "HLA-DMA_CD4", "CALM1_RPSA", "LRPAP1_LDLR", "APP_TNFRSF21", "LAMC3_ITGB4", "DSG2_EGFR", "HSP90B1_EGFR", "APP_F2RL1", "GSTP1_LRP5", "COL1A1_ITGA3", "DSG2_CD44", "DSG2_INSR", "LGALS3_PTPRK", "CD99_INSR", "EFNB2_INSR", "CDH5_TGFBR2", "SERPINE1_PLAUR", "DSG2_PDGFRA", "APP_BDKRB2", "ITGA4_CXCR4", "PTPRF_PTPRA", "GRN_EGFR", "LAMB1_ITGB4", "F11R_TGFBR1", "PTPRC_CD2", "ARF1_LRP5", "EFNB2_FZD5", "CALR_EGFR", "APP_ACKR3", "HBEGF_CD44", "GSTP1_IL7R", "SERPING1_LRP1", "ITGB1_CSF2RB", "CCL5_SDC1", "LAMB1_SDC1", "TGFB1_ITGAV", "CLSTN1_M6PR", "AGT_F2R", "APOE_LRP5", "LRPAP1_LRP1", "ITGB2_ITGAX", "CEACAM1_EGFR", "CXCL10_CXCR4", "HLA-F_CD8A", "SAA1_F2R", "APP_TGFBR2", "ITGB1_DAB2", "ANXA1_F2RL1", "ICAM1_IL2RA", "PSEN1_NCSTN", "ARF1_PLD2", "LAMC3_ITGA3", "TGFB1_VDR", "ITGB1_ITGAX", "CXCL9_C3AR1", "COPA_NOTCH1", "ADM_PTGER4", "PLXNB2_PTPRK", "COL18A1_ITGA5", "JAG1_CD46", "PCDH1_DERL1", "PTPRC_MET", "APOE_SORL1", "SERPINA1_LRP1", "AGRN_LRP1", "CDH1_ERBB2", "WNT5A_SCARB2", "ADAM15_ITGA5", "HMGB2_ITPR3", "SEMA4D_PLXNB2", "NRG1_LSR", "MDK_SDC3", "CCL5_SDC4", "APP_CD36", "ITGB1_ITGA4", "PECAM1_ACKR3", "ANXA1_FPR3", "EFNB1_FZD5", "EFNB1_ERBB2", "VEGFA_PDGFRA", "MDK_PTPRB", "TGFB1_ENG", "COPA_SORT1", "TIMP1_LRP1", "PSAP_ERBB2", "CDH1_PKD1", "DSG2_SEC63", "NID1_ITGAV", "NENF_SEC63", "COL18A1_FLT1", "EFNB1_INSR", "PECAM1_SDC4", "CALM1_TNFRSF1B", "APP_NCSTN", "CALR_FAS", "PLAU_IGF2R", "DSG2_CXCR4", "CCL5_CXCR4", "SAA1_PTAFR", "NAMPT_ANXA6", "PIK3CB_ITGB4", "ITGB1_DAG1", "DSG2_DLG1", "LTB_TNFRSF1A", "CXCL10_BDKRB2", "SERPINA1_SDC2", "ADAM17_TNFRSF1A", "ARF1_MYLK", "ITGB2_ITGAL", "COL18A1_ITGB5", "A2M_TNFRSF14", "SAA1_C3AR1", "TGFB1_ITGB1", "HBEGF_ERBB3", "APP_HRH1", "DSC2_SEC63", "APOE_SDC2", "PIK3CB_FCGR2A", "ITGB1_CD36", "ITGB1_JAM3", "COL18A1_ITGA3", "GRN_TNFRSF1B", "LGALS3_TGFBR2", "APP_GPC1", "LAMC3_CD44", "SAA1_SELENOS", "ITGB1_PDGFRB", "FN1_CD44", "ITGB2_EGFR", "LRPAP1_SORL1", "APP_LRP1", "PECAM1_SDC1", "GMFB_ITPR3", "CXCL16_C3AR1", "LAMB1_ITGAV", "PIK3CB_PDGFRA", "TGFB1_F11R", "GSTP1_MYLK", "ITGA4_RPSA", "PDGFA_PDGFRA", "COL18A1_ITGAV", "CXCL16_LPAR2", "ANXA1_EGFR", "APP_C5AR1", "LAMB1_PTPRF", "SECTM1_CD7", "PSEN1_DAG1", "COPA_EGFR", "ITGB2_JAM3", "GAS6_AXL", "LAMB1_ITGA2", "MANF_SEC63", "LAMB2_RPSA", "COPA_SPHK1", "SERPINE2_LRP1", "CALR_MYLK", "VEGFB_ITGB1", "NAMPT_CUL5", "PSEN1_NOTCH1", "CXCL3_BDKRB2", "CCL5_BDKRB2", "COPA_NCSTN", "ITGA4_CD44", "TGFB1_ITGB5", "HMGB1_TLR2", "CD99_FZD5", "CXCL16_ACKR3", "LAMB2_ITGA6", "HMGB2_CD44", "WNT5A_TFRC", "GRN_CFTR", "LAMB1_SDC2", "HMGB2_DSG2", "AREG_ERBB3", "TGFB1_TGFBR2", "ANXA1_PTPRF", "CCL28_C3AR1", "TGFB1_ITGB6", "HSP90B1_ERBB2", "ANXA1_CD4", "GSTP1_FAS", "ADAM17_ITGA5", "CALM1_EGFR", "AGT_F2RL1", "PTPRC_IFNAR1", "VEGFA_NRP1", "CXCL10_C3AR1", "ADM_RAMP3", "PLAT_LRP1", "CXCL16_CXCR4", "NAMPT_IGF1R", "ARF1_FAS", "CXCL14_CXCR4", "PIK3CB_ERBB3", "F11R_JAM3", "GPNMB_EGFR", "CDH5_ACVRL1", "ANXA1_FAS", "CD320_TGFBR2", "DSG2_DAB2", "CALM1_MYLK", "CDH1_ITGAE", "CXCL16_BDKRB2", "HSPG2_DAG1", "VEGFA_AXL", "ICAM2_CD44", "CXCL3_C3AR1", "PIK3CB_TGFBR2", "WNT5A_LDLR", "ANXA1_CXCR4", "PSEN1_TGFBR1", "PTPRC_INSR", "VWF_ITGAV", "VEGFB_FAT1", "VEGFA_EPHB2", "EFNA1_TGFBR1", "PVR_CDH1", "AIMP1_RACK1", "COL1A1_DDR1", "PTPRC_ERBB2", "CXCL12_S1PR1", "CALM2_MYLK", "COPA_PDGFRB", "PSEN1_NOTCH2", "APP_C3AR1", "COL18A1_KDR", "APP_CXCR4", "HMGB1_TLR4", "CDH1_PTPRM", "DSC2_RYK", "COPA_CD8A", "FBN1_CLSTN1", "ADM_CALCRL", "AGT_ATP6AP2", "PTPRF_EGFR", "CSF1_CSF1R", "VEGFA_PIK3CB", "SAA1_BDKRB2", "JAG1_LRP1", "CCL5_C3AR1", "PIK3CB_IL2RG", "CDH1_CFTR", "PIK3CB_AXL", "APP_EDNRB", "ADM_VIPR1", "CCL28_C5AR1", "HDGF_FAS", "AREG_EGFR", "NAMPT_INSR", "VEGFA_PTPRB", "ITGB2_ICAM2", "LAMB1_FAS", "CXCL9_ACKR3", "YARS_RPSA", "CLSTN1_PRNP", "CXCL16_C5AR1", "TNFSF10_TNFRSF10B", "ADAM17_TNFRSF1B", "EFNA2_TGFBR1", "APP_LPAR2", "EFNB2_EPHB2", "FN1_EGFR", "PECAM1_CXCR4", "PTPRF_TGFBR2", "ADM_GIPR", "PLAU_LRP1", "EFNB1_SDC2", "TNFSF13_SDC2", "PDGFA_PDGFRB", "TGFB1_SDC2", "ITGA4_TFRC", "SAA1_ACKR3", "EFNA1_EPHB2", "NRG1_ERBB3", "CALM3_MYLK", "ITGB1_ITGA9", "ANXA1_C5AR1", "THBS1_LRP1", "SAA1_CXCR4", "OCLN_TGFBR2", "AGT_ACKR3", "HMGB1_AGTRAP", "BMP2_NEO1", "APP_IGF1R", "PTPRC_EGFR", "CDH5_P2RX4", "CCL28_CXCR4", "ITGB1_IGF1R", "F11R_PTPRB", "SERPINE1_LRP1", "F11R_TGFBR2", "APP_SORL1", "ITGAL_F11R", "AREG_ERBB2", "LAMB2_ITGB4"]
j_pairs = lr_cc7["enriched_LRs"].pair;

In [21]:
not_in_j = setdiff(r_pairs, j_pairs)
in_both = intersect(r_pairs, j_pairs)
not_in_j_reversed = map(x -> string(split(x, "_")[2], "_", split(x, "_")[1]), not_in_j);

In [22]:
j_unique = setdiff(setdiff(j_pairs, r_pairs), duplicates.pair)

43-element Vector{String}:
 "COL4A1_ITGB1"
 "LGALS3_ITGB1"
 "HSP90B1_ITGB1"
 "ADAM9_CDH1"
 "HLA-DRA_PTPRC"
 "AREG_ICAM1"
 "HSPG2_ITGB1"
 "ADAM9_ITGB1"
 "PSEN1_CDH1"
 "RTN4_CD99"
 ⋮
 "NPNT_ITGB1"
 "ANXA1_ITGA4"
 "ADAM17_ITGB1"
 "FBN1_ITGB1"
 "LAMB2_ITGB1"
 "ICAM3_ITGB2"
 "PTPRC_ITGAL"
 "EFNB1_CD99"
 "THBS1_ITGB1"

In [26]:
in.(df_7.pair, Ref(j_unique))

540-element BitVector:
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 ⋮
 0
 0
 0
 0
 0
 0
 0
 0
 0

In [29]:
df_7 = lr_cc7["enriched_LRs"]
df_7[in.(df_7.pair, Ref(j_unique)), :]

Row,from,to,correlation,ligand_FC,Receptor_FC,pair
Unnamed: 0_level_1,String,String,Float64,Float64,Float64,String
1,COL4A1,ITGB1,0.681179,2.4032,1.1237,COL4A1_ITGB1
2,LGALS3,ITGB1,0.677309,0.7695,1.1237,LGALS3_ITGB1
3,HSP90B1,ITGB1,0.64627,0.9562,1.1237,HSP90B1_ITGB1
4,ADAM9,CDH1,0.63985,0.9508,0.8122,ADAM9_CDH1
5,HLA-DRA,PTPRC,0.634468,1.2791,1.6397,HLA-DRA_PTPRC
6,AREG,ICAM1,0.624456,1.4834,1.9715,AREG_ICAM1
7,HSPG2,ITGB1,0.62099,1.3576,1.1237,HSPG2_ITGB1
8,ADAM9,ITGB1,0.60568,0.9508,1.1237,ADAM9_ITGB1
9,PSEN1,CDH1,0.573752,0.7368,0.8122,PSEN1_CDH1
10,RTN4,CD99,0.56588,0.8991,0.9915,RTN4_CD99
