In [None]:
# For google colab
#!pip install git+https://github.com/jmillanacosta/pyaop.git
#!pip install pandas

# Data Tables Generation

This notebook demonstrates how to generate various data tables from an `AOPNetwork`.

- **AOP Table**: Key Event relationships and AOP associations
- **Component Table**: Key Event components and biological processes  
- **Gene Table**: Gene-protein associations with expression data
- **Compound Table**: Chemical stressor information
- **Gene Expression Table**: Detailed gene expression data

First we generate an `AOPNetwork`:

In [None]:
import pandas as pd

from pyaop.aop.builder import AOPNetworkBuilder

# Build a network with various associations
builder = AOPNetworkBuilder()
builder.query_by_identifier(query_type="mie", values="https://identifiers.org/aop.events/1502")
builder.query_genes_for_ke(include_proteins=False)
builder.query_compounds_for_network()
builder.query_components_for_network()
builder.query_organs_for_kes()
builder.query_gene_expression(confidence_level=80)
# Get the populated network
network = builder.network
print(f"Network summary: {network.get_summary()}")

Network summary: {'total_key_events': 12, 'mie_count': 1, 'ao_count': 3, 'ke_count': 8, 'ker_count': 17, 'gene_associations': 26, 'gene_expression_associations': 864, 'compound_associations': 10, 'component_associations': 7, 'organ_associations': 6, 'total_aops': 3}


Then generate the tables:

## 1. AOP Table - Key Event Relationships

The AOP table shows the relationships between Key Events and their associated AOPs:

In [None]:
# Generate AOP table from the network data model
aop_table = network.aop_table()

print(f"Generated {len(aop_table)} AOP table (KERs) entries")

# Convert to DataFrame for better display
aop_df = pd.DataFrame(aop_table)
print("\nAOP Table columns:", list(aop_df.columns))
print("\nFirst few entries:")
aop_df.head()

Generated 17 AOP table (KERs) entries

AOP Table columns: ['source_id', 'source_label', 'source_type', 'ker_label', 'curie', 'target_id', 'target_label', 'target_type', 'aop_list', 'aop_titles', 'is_connected']

First few entries:


Unnamed: 0,source_id,source_label,source_type,ker_label,curie,target_id,target_label,target_type,aop_list,aop_titles,is_connected
0,https://identifiers.org/aop.events/1515,Spermatocyte depletion,ke,1734,aop.relationships:1734,https://identifiers.org/aop.events/1506,Testicular atrophy,ao,AOP:212,Histone deacetylase inhibition leading to test...,True
1,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted",ke,1712,aop.relationships:1712,https://identifiers.org/aop.events/1262,Apoptosis,ke,AOP:212,Histone deacetylase inhibition leading to test...,True
2,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,2010,aop.relationships:2010,https://identifiers.org/aop.events/1515,Spermatocyte depletion,ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True
3,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,1716,aop.relationships:1716,https://identifiers.org/aop.events/1262,Apoptosis,ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True
4,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,1715,aop.relationships:1715,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted",ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True


## 2. Component Table - Key Event Components

The component table shows the biological processes and objects associated with each Key Event:

In [None]:
# Generate component table from the network associations
component_table = network.component_table()

print(f"Generated {len(component_table)} component table entries")
component_df = pd.DataFrame(component_table)

component_df

Generated 8 component table entries


Unnamed: 0,ke_id,ke_number,ke_uri,ke_name,action_processes,organs,action_process_count,organ_count
0,aop.events_1262,1262,https://identifiers.org/aop.events/1262,Apoptosis,"[{'action': 'increased', 'process_id': 'proces...",[{'organ_id': 'http://purl.obolibrary.org/obo/...,1,1
1,aop.events_1503,1503,https://identifiers.org/aop.events/1503,"Histone acetylation, increase","[{'action': 'increased', 'process_id': 'proces...",[{'organ_id': 'http://purl.obolibrary.org/obo/...,1,1
2,aop.events_1502,1502,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,"[{'action': 'decreased', 'process_id': 'proces...",[{'organ_id': 'http://purl.obolibrary.org/obo/...,1,1
3,aop.events_1560,1560,https://identifiers.org/aop.events/1560,"Cell differentiation, altered","[{'action': 'abnormal', 'process_id': 'process...",[],1,0
4,aop.events_1239,1239,https://identifiers.org/aop.events/1239,"Altered, Gene Expression","[{'action': 'abnormal', 'process_id': 'process...",[],1,0
5,aop.events_1506,1506,https://identifiers.org/aop.events/1506,Testicular atrophy,"[{'action': 'increased', 'process_id': 'proces...","[{'organ_id': 'object_FMA_7210', 'organ_name':...",1,1
6,aop.events_1515,1515,https://identifiers.org/aop.events/1515,Spermatocyte depletion,[],"[{'organ_id': 'object_FMA_7210', 'organ_name':...",0,1
7,aop.events_1505,1505,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted","[{'action': 'disrupted', 'process_id': 'proces...",[{'organ_id': 'http://purl.obolibrary.org/obo/...,1,1


## 3. Gene Table - Gene-Protein Associations

The gene table shows genes, their protein products, and expression data:

In [None]:
# Generate gene table from gene associations and expression data
gene_table = network.gene_table()

print(f"Generated {len(gene_table)} gene table entries")

# Convert to DataFrame for analysis
gene_df = pd.DataFrame(gene_table)

gene_df

Generated 19 gene table entries


Unnamed: 0,gene,protein,protein_id,gene_id,protein_node_id,expression_organs,expression_levels,expression_confidence,expression_ids
0,ENSG00000048052,,,gene_ENSG00000048052,,testis; testis; testis; testis; testis; testis...,89.578460000000006858; 89.578460000000006858; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_103607213; http://...
1,ENSG00000049130,,,gene_ENSG00000049130,,testis; testis; testis; testis; testis; testis...,55.153919999999999391; 55.153919999999999391; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_154523557; http://...
2,ENSG00000094631,,,gene_ENSG00000094631,,testis; testis; testis; testis; testis; testis...,97.346350000000001046; 97.346350000000001046; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_40168603; http://b...
3,ENSG00000109320,,,gene_ENSG00000109320,,testis; testis; testis; testis; testis; testis...,83.498869999999996594; 83.498869999999996594; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_162691126; http://...
4,ENSG00000110427,,,gene_ENSG00000110427,,testis; testis; testis; testis; testis; testis...,51.020060000000000855; 51.020060000000000855; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_167787429; http://...
5,ENSG00000116478,,,gene_ENSG00000116478,,testis; testis; testis; testis; testis; testis...,94.644689999999997099; 94.644689999999997099; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_35376778; http://b...
6,ENSG00000118971,,,gene_ENSG00000118971,,testis; testis; testis; testis; testis; testis...,73.78740999999999417; 73.78740999999999417; 99...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_26089081; http://b...
7,ENSG00000124762,,,gene_ENSG00000124762,,testis; testis; testis; testis; testis; testis...,96.325220000000001619; 96.325220000000001619; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_147593673; http://...
8,ENSG00000141510,,,gene_ENSG00000141510,,testis; testis; testis; testis; testis; testis...,76.709209999999998786; 76.709209999999998786; ...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_47923384; http://b...
9,ENSG00000147099,,,gene_ENSG00000147099,,testis; testis; testis; testis; testis; testis...,67.42464999999999975; 67.42464999999999975; 72...,high confidence level; high confidence level; ...,http://bgee.org/#EXPRESSION_219681793; http://...


## 4. Gene Expression Table - Detailed Expression Data

A dedicated table for gene expression information:

In [None]:
# Generate detailed gene expression table
gene_expr_table = network.gene_expression_table()

print(f"Generated {len(gene_expr_table)} gene expression entries")

# Convert to DataFrame
gene_expr_df = pd.DataFrame(gene_expr_table)
gene_expr_df

Generated 19 gene expression entries


Unnamed: 0,gene_id,gene_label,organ,organ_id,expression_level,confidence,developmental_stage,expr_id
0,ENSG00000048052,ENSG00000048052,testis,http://purl.obolibrary.org/obo/UBERON_0000473,89.57846,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_103607213
1,ENSG00000049130,ENSG00000049130,testis,http://purl.obolibrary.org/obo/UBERON_0000473,55.15392,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_154523557
2,ENSG00000094631,ENSG00000094631,testis,http://purl.obolibrary.org/obo/UBERON_0000473,97.34635,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_40168603
3,ENSG00000109320,ENSG00000109320,testis,http://purl.obolibrary.org/obo/UBERON_0000473,83.49886999999998,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_162691126
4,ENSG00000110427,ENSG00000110427,testis,http://purl.obolibrary.org/obo/UBERON_0000473,51.02006,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_167787429
5,ENSG00000116478,ENSG00000116478,testis,http://purl.obolibrary.org/obo/UBERON_0000473,94.64469,high confidence level,third decade stage (human),http://bgee.org/#EXPRESSION_35376778
6,ENSG00000118971,ENSG00000118971,testis,http://purl.obolibrary.org/obo/UBERON_0000473,73.78740999999998,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_26089081
7,ENSG00000124762,ENSG00000124762,testis,http://purl.obolibrary.org/obo/UBERON_0000473,96.32522,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_147593673
8,ENSG00000141510,ENSG00000141510,testis,http://purl.obolibrary.org/obo/UBERON_0000473,76.70921,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_47923384
9,ENSG00000147099,ENSG00000147099,testis,http://purl.obolibrary.org/obo/UBERON_0000473,67.42465,high confidence level,80 year-old and over stage (human),http://bgee.org/#EXPRESSION_219681793


## 5. Compound Table - Chemical Stressors

The compound table shows chemical compounds that act as stressors in AOPs:

In [None]:
# Generate compound table from compound associations
compound_table = network.compound_table()

print(f"Generated {len(compound_table)} compound table entries")

# Convert to DataFrame
compound_df = pd.DataFrame(compound_table)
compound_df

Generated 8 compound table entries


Unnamed: 0,compound_name,chemical_label,pubchem_id,pubchem_compound,cas_id,chemical_uri,smiles,node_id
0,Apicidin,Apicidin,467801,https://identifiers.org/pubchem.compound/467801,467801,https://identifiers.org/cas/183506-66-3,,chemical_467801
1,Apicidin,Apicidin,6918328,https://identifiers.org/pubchem.compound/6918328,6918328,https://identifiers.org/cas/183506-66-3,,chemical_6918328
2,Butyrate,Butyrate,104775,https://identifiers.org/pubchem.compound/104775,104775,https://identifiers.org/cas/461-55-2,,chemical_104775
3,MS-275,MS-275,4261,https://identifiers.org/pubchem.compound/4261,4261,https://identifiers.org/cas/209783-80-2,,chemical_4261
4,Methoxyacetic acid,Methoxyacetic acid,12251,https://identifiers.org/pubchem.compound/12251,12251,https://identifiers.org/cas/625-45-6,,chemical_12251
5,Suberoylanilide hydroxamic acid,Suberoylanilide hydroxamic acid,5311,https://identifiers.org/pubchem.compound/5311,5311,https://identifiers.org/cas/149647-78-9,,chemical_5311
6,Trichostatin A,Trichostatin A,444732,https://identifiers.org/pubchem.compound/444732,444732,https://identifiers.org/cas/58880-19-6,,chemical_444732
7,Valproic acid,Valproic acid,3121,https://identifiers.org/pubchem.compound/3121,3121,https://identifiers.org/cas/99-66-1,,chemical_3121


## Table Export and Usage

These tables can be easily exported to various formats for further analysis:

In [None]:
# Example: Export tables to CSV files
import os

# Create output directory
output_dir = "aop_tables"
os.makedirs(output_dir, exist_ok=True)

# Use the convenience methods to get table data
tables_to_export = [
    (aop_df, "aop_relationships.csv"),
    (component_df, "ke_components.csv"),
    (gene_df, "genes_proteins.csv"),
    (gene_expr_df, "gene_expressions.csv"),
    (compound_df, "compounds.csv"),
]

for table_data, filename in tables_to_export:
    filepath = os.path.join(output_dir, filename)
    table_data.to_csv(filepath, index=False)
    print(f"Exported {filename}: {len(table_data)} rows, {len(table_data.columns)} columns")

print(f"\nAll tables exported to {output_dir}/ directory")

Exported aop_relationships.csv: 17 rows, 11 columns
Exported ke_components.csv: 8 rows, 8 columns
Exported genes_proteins.csv: 19 rows, 9 columns
Exported gene_expressions.csv: 19 rows, 8 columns
Exported compounds.csv: 8 rows, 8 columns

All tables exported to aop_tables/ directory
