In [1]:
import pandas as pd
mirna_prior = pd.read_csv('data/transmir.csv')
mirna_exp = pd.read_csv('data/holland_miRNA_exp.csv')
print(mirna_prior.head())
print(mirna_exp.head())

  source             target  mor
0    SRF     hsa-miR-210-3p    1
1   MYOG  hsa-miR-133a-1-3p    1
2   MYOG  hsa-miR-133a-2-3p    1
3   MYOG    hsa-miR-133b-3p    1
4   MYOG     hsa-miR-206-3p    1
  Unnamed: 0  hsa-miR-335-5p  hsa-miR-338-3p  hsa-miR-324-3p  hsa-miR-18a-3p  \
0  gene:1004        0.141611        0.250352        0.704130       -0.383189   
1  gene:1007       -1.321767       -0.046397       -3.864417        0.099498   
2  gene:1016       -3.050029        1.332095       -0.609784        0.323857   
3  gene:1079       -1.887631       -1.375533        0.270846        1.155418   
4   gene:118        0.379413       -1.922098        0.492464        0.077306   

   hsa-miR-299-5p  hsa-miR-483-5p  hsa-miR-885-5p  hsa-miR-193b-5p  \
0        0.351972        0.516197        0.752274         0.138512   
1       -1.459357        2.375362        0.492656         0.833343   
2       -1.153341        0.147729       -1.217415        -0.994623   
3        1.905942        2.169821        

In [3]:
# Get the set of miRNA targets from mirna_prior
mirna_targets = set(mirna_prior["target"].to_list())

# Get the set of miRNA column names from mirna_exp (excluding the first column which is Unnamed: 0)
mirna_columns = set(mirna_exp.columns[1:].to_list())

# Find the intersection
common_mirnas = mirna_targets.intersection(mirna_columns)

# Count and print the number of common miRNAs
print(f"Number of miRNAs found in both datasets: {len(common_mirnas)}")

# Optionally, display some of the common miRNAs
print("Sample of common miRNAs:")
for mirna in list(common_mirnas)[:10]:  # Show first 10
    print(mirna)


Number of miRNAs found in both datasets: 298
Sample of common miRNAs:
hsa-miR-143-3p
hsa-miR-142-3p
hsa-miR-487a-5p
hsa-miR-197-3p
hsa-miR-519d-3p
hsa-let-7i-5p
hsa-miR-552-3p
hsa-miR-195-5p
hsa-miR-15a-5p
hsa-miR-150-5p


In [4]:
tf_gene_table = pd.read_csv('data/collectri.csv')
tf_gene_table.head()

Unnamed: 0,source,target,mor
0,MYC,TERT,1
1,SPI1,BGLAP,1
2,SMAD3,JUN,1
3,SMAD4,JUN,1
4,STAT5A,IL2,1


In [5]:
# Create a matrix representation of the tf_gene_table
tf_matrix = pd.pivot_table(
    tf_gene_table, 
    values='mor', 
    index='source', 
    columns='target', 
    fill_value=0
)

# Display basic info about the matrix
print(f"Matrix shape: {tf_matrix.shape}")
print(f"Number of source TFs: {len(tf_matrix.index)}")
print(f"Number of target genes: {len(tf_matrix.columns)}")
print(f"Number of non-zero interactions: {(tf_matrix != 0).sum().sum()}")

# Display a small sample of the matrix
tf_matrix.iloc[:5, :5]

Matrix shape: (1186, 6651)
Number of source TFs: 1186
Number of target genes: 6651
Number of non-zero interactions: 43056


target,A2M,A2ML1,A4GALT,AACS,AANAT
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABL1,0.0,0.0,0.0,0.0,0.0
ADNP,0.0,0.0,0.0,0.0,0.0
ADNP2,0.0,0.0,0.0,0.0,0.0
AEBP1,0.0,0.0,0.0,0.0,0.0
AEBP2,0.0,0.0,0.0,0.0,0.0


In [7]:
tf_matrix.to_csv('data/tf_matrix.csv')

In [8]:
tf_mir_table = pd.read_csv('data/transmir.csv')
tf_gene_table

Unnamed: 0,source,target,mor
0,MYC,TERT,1
1,SPI1,BGLAP,1
2,SMAD3,JUN,1
3,SMAD4,JUN,1
4,STAT5A,IL2,1
...,...,...,...
43051,AP1,GRIN1,1
43052,AP1,SQSTM1,1
43053,NFKB,SQSTM1,1
43054,NFKB,ORM1,1


In [10]:
# Create a matrix representation of the tf_gene_table
tf_matrix = pd.pivot_table(
    tf_mir_table, 
    values='mor', 
    index='source', 
    columns='target', 
    fill_value=0
)

# Display basic info about the matrix
print(f"Matrix shape: {tf_matrix.shape}")
print(f"Number of source TFs: {len(tf_matrix.index)}")
print(f"Number of target genes: {len(tf_matrix.columns)}")
print(f"Number of non-zero interactions: {(tf_matrix != 0).sum().sum()}")

# Display a small sample of the matrix
tf_matrix.iloc[:5, :5]

Matrix shape: (415, 978)
Number of source TFs: 415
Number of target genes: 978
Number of non-zero interactions: 5532


target,hsa-let-7-3p,hsa-let-7-5p,hsa-let-7a-1-3p,hsa-let-7a-1-5p,hsa-let-7a-2-3p
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AGO2,0.0,0.0,0.0,0.0,0.0
AHR,0.0,0.0,0.0,0.0,0.0
AKT1,0.0,0.0,0.0,0.0,0.0
AKT2,0.0,0.0,0.0,0.0,0.0
AKT3,0.0,0.0,0.0,0.0,0.0


In [11]:
tf_matrix.to_csv('data/tf_mir_matrix.csv')

In [12]:
mir_gene_table = pd.read_csv('data/miRTarBase_SE_WR.csv')
mir_gene_table.head()

Unnamed: 0,miRTarBase ID,miRNA,Species (miRNA),Target Gene,Target Gene (Entrez ID),Species (Target Gene),Experiments,Support Type,References (PMID)
0,MIRT003105,hsa-miR-122-5p,hsa,SLC7A1,6541,hsa,Luciferase reporter assay//Western blot,Functional MTI,17179747
1,MIRT003112,hsa-miR-122-5p,hsa,ADAM17,6868,hsa,Luciferase reporter assay//qRT-PCR,Functional MTI,19296470
2,MIRT003111,hsa-miR-122-5p,hsa,NUMBL,9253,hsa,Luciferase reporter assay//qRT-PCR,Functional MTI,19296470
3,MIRT003107,hsa-miR-122-5p,hsa,FOXJ3,22887,hsa,Luciferase reporter assay//qRT-PCR,Functional MTI,19296470
4,MIRT003106,hsa-miR-122-5p,hsa,XPO6,23214,hsa,Luciferase reporter assay//qRT-PCR,Functional MTI,19296470


In [16]:
# Create a matrix representation of the mir_gene_table
# First make a copy and set all 'mor' values to -1 as requested
mir_gene_matrix_data = mir_gene_table.copy()
mir_gene_matrix_data['mor'] = -1

# Create the matrix using pivot_table
mir_matrix = pd.pivot_table(
    mir_gene_matrix_data, 
    values='mor', 
    index='miRNA', 
    columns='Target Gene', 
    fill_value=0
)

# Display basic info about the matrix
print(f"Matrix shape: {mir_matrix.shape}")
print(f"Number of miRNAs: {len(mir_matrix.index)}")
print(f"Number of target genes: {len(mir_matrix.columns)}")
print(f"Number of non-zero interactions: {(mir_matrix != 0).sum().sum()}")

# Display a small sample of the matrix
mir_matrix.iloc[:5, :5]

# Save the matrix to a CSV file
mir_matrix.to_csv('data/mir_gene_matrix.csv')

Matrix shape: (1020, 3259)
Number of miRNAs: 1020
Number of target genes: 3259
Number of non-zero interactions: 9152


In [19]:
expr_mirna = pd.read_csv("data/holland_miRNA_exp.csv", index_col=0).T
expr_mirna

Unnamed: 0,gene:1004,gene:1007,gene:1016,gene:1079,gene:118,gene:1244,gene:1257,gene:1271,gene:1272,gene:1288,...,gene:531,gene:577,gene:643,gene:649,gene:712,gene:87,gene:885,gene:888,gene:90,gene:941
hsa-miR-335-5p,0.141611,-1.321767,-3.050029,-1.887631,0.379413,-0.784947,-0.002571,-0.003384,0.568045,0.277100,...,0.602332,0.379413,0.591079,3.358881,0.690460,1.543854,0.575003,-0.031701,-1.425735,-0.514254
hsa-miR-338-3p,0.250352,-0.046397,1.332095,-1.375533,-1.922098,0.430628,0.217960,0.096280,0.077799,-1.177185,...,0.163142,-1.922098,-0.447427,-0.969679,-4.163376,0.913810,0.576817,-0.235505,-0.999196,-0.234419
hsa-miR-324-3p,0.704130,-3.864417,-0.609784,0.270846,0.492464,-0.784821,-1.025889,0.609225,0.480692,0.482811,...,-1.477494,0.492464,0.311113,0.044173,-4.710498,0.294456,-1.170910,-0.541314,-1.688923,-0.932626
hsa-miR-18a-3p,-0.383189,0.099498,0.323857,1.155418,0.077306,-0.437243,-0.981845,-0.531475,-0.457030,0.993041,...,-0.993624,0.077306,1.453736,-0.523398,-2.480339,1.156752,-0.855128,-0.454534,0.945017,-0.574665
hsa-miR-299-5p,0.351972,-1.459357,-1.153341,1.905942,-0.272614,-0.369684,-0.475882,-0.394260,-0.076574,-0.194762,...,0.618973,-0.272614,0.496954,0.387894,0.130125,-0.563968,0.038531,0.432909,-1.210118,-0.524134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
hsa-miR-30b-3p,0.207897,-2.247840,0.773578,1.109289,0.190310,-0.302317,-0.918972,0.323626,0.244774,0.069337,...,-0.803412,0.190310,0.608900,1.013613,-2.056555,1.692031,-0.712749,0.236853,0.578046,-0.975968
hsa-let-7i-3p,-0.691545,-4.851146,0.379675,-0.806143,-0.431869,0.010440,-0.151079,0.167507,-0.958132,0.773006,...,0.014639,-0.431869,0.827850,0.229385,-1.728703,1.165394,-0.299366,0.954776,2.395499,-0.443521
hsa-miR-15b-3p,0.113958,-1.166048,1.329745,1.627123,0.957060,-0.018490,-1.221672,0.184130,0.290876,0.930726,...,-0.549807,0.957060,1.865294,0.504939,1.892154,1.212598,-1.033257,0.624943,1.460024,-0.042215
hsa-miR-629-3p,-0.666214,-2.457469,-0.124792,0.499321,0.043335,0.352945,-0.404762,-0.293690,-0.262890,1.379049,...,-0.201843,0.043335,0.495900,0.081782,-0.717982,-0.031156,-0.622976,0.555345,0.686573,0.352398
