In [1]:
from pyaop.aop.builder import AOPNetworkBuilder

# Build a network with various associations
builder = AOPNetworkBuilder()
builder.query_by_identifier(
    query_type="mie", values="https://identifiers.org/aop.events/1502"
)
builder.query_genes_for_ke(include_proteins=False)
builder.query_compounds_for_network()
builder.query_components_for_network()
builder.query_gene_expression(confidence_level=80)

(<pyaop.aop.core_model.AOPNetwork at 0x7abc6c765690>,
 '\n            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n            PREFIX orth: <http://purl.org/net/orth#>\n            PREFIX lscr: <http://purl.org/lscr#>\n            PREFIX genex: <http://purl.org/genex#>\n            PREFIX obo: <http://purl.obolibrary.org/obo/>\n            PREFIX dcterms: <http://purl.org/dc/terms/>\n\n            SELECT ?gene_idI ?gene_id ?anatomical_entity_id ?anatomical_entity_name ?developmental_stage_id ?developmental_stage_name ?expression_level ?confidence_level_id ?confidence_level_name ?expr\n            WHERE {\n              VALUES ?gene_id { "ENSG00000048052" "ENSG00000049130" "ENSG00000094631" "ENSG00000109320" "ENSG00000110427" "ENSG00000116478" "ENSG00000118971" "ENSG00000124762" "ENSG00000141510" "ENSG00000147099" "ENSG00000163517" "ENSG00000167815" "ENSG00000171791" "ENSG00000172465" "ENSG00000184260" "ENSG

In [2]:
# Get the populated network
network = builder.network
print(f"Network summary: {network.get_summary()}")

Network summary: {'total_key_events': 12, 'mie_count': 1, 'ao_count': 3, 'ke_count': 8, 'ker_count': 17, 'gene_associations': 26, 'gene_expression_associations': 0, 'compound_associations': 10, 'component_associations': 7, 'organ_associations': 0, 'total_aops': 3}


# Data Tables Generation

This notebook demonstrates how to generate various data tables from an `AOPNetwork`.

## Overview of Available Tables

The pyAOP library provides several table builders that extract structured data from the `AOPNetwork`:

- **AOP Table**: Key Event relationships and AOP associations
- **Component Table**: Key Event components and biological processes
- **Gene Table**: Gene-protein associations with expression data
- **Compound Table**: Chemical stressor information
- **Gene Expression Table**: Detailed gene expression data

In [3]:
# Import the table builders that work with AOPNetwork data model
from pyaop.exports.data_tables.aop import AOPTableBuilder
from pyaop.exports.data_tables.component import ComponentTableBuilder
from pyaop.exports.data_tables.gene import GeneTableBuilder, GeneExpressionTableBuilder
from pyaop.exports.data_tables.compound import CompoundTableBuilder

import pandas as pd

## 1. AOP Table - Key Event Relationships

The AOP table shows the relationships between Key Events and their associated AOPs:

In [4]:
# Generate AOP table from the network data model
aop_builder = AOPTableBuilder(network)
aop_table = aop_builder.build_aop_table()

print(f"Generated {len(aop_table)} AOP table (KERs) entries")

# Convert to DataFrame for better display
aop_df = pd.DataFrame(aop_table)
print("\nAOP Table columns:", list(aop_df.columns))
print("\nFirst few entries:")
aop_df.head()

Generated 17 AOP table (KERs) entries

AOP Table columns: ['source_id', 'source_label', 'source_type', 'ker_label', 'curie', 'target_id', 'target_label', 'target_type', 'aop_list', 'aop_titles', 'is_connected']

First few entries:


Unnamed: 0,source_id,source_label,source_type,ker_label,curie,target_id,target_label,target_type,aop_list,aop_titles,is_connected
0,https://identifiers.org/aop.events/1515,Spermatocyte depletion,ke,1734,aop.relationships:1734,https://identifiers.org/aop.events/1506,Testicular atrophy,ao,AOP:212,Histone deacetylase inhibition leading to test...,True
1,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted",ke,1712,aop.relationships:1712,https://identifiers.org/aop.events/1262,Apoptosis,ke,AOP:212,Histone deacetylase inhibition leading to test...,True
2,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,2010,aop.relationships:2010,https://identifiers.org/aop.events/1515,Spermatocyte depletion,ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True
3,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,1716,aop.relationships:1716,https://identifiers.org/aop.events/1262,Apoptosis,ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True
4,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,mie,1715,aop.relationships:1715,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted",ke,"AOP:212,AOP:274,AOP:275",Histone deacetylase inhibition leading to test...,True


## 2. Component Table - Key Event Components

The component table shows the biological processes and objects associated with each Key Event:

In [5]:
# Generate component table from the network associations
component_builder = ComponentTableBuilder(network)
component_table = component_builder.build_component_table()

print(f"Generated {len(component_table)} component table entries")
component_df = pd.DataFrame(component_table)

component_df

Generated 7 component table entries


Unnamed: 0,ke_id,ke_number,ke_uri,ke_name,action_processes,organs,action_process_count,organ_count
0,aop.events_1506,1506,https://identifiers.org/aop.events/1506,Testicular atrophy,"[{'action': 'increased', 'process_id': 'proces...",[],1,0
1,aop.events_1505,1505,https://identifiers.org/aop.events/1505,"Cell cycle, disrupted","[{'action': 'disrupted', 'process_id': 'proces...",[],1,0
2,aop.events_1560,1560,https://identifiers.org/aop.events/1560,"Cell differentiation, altered","[{'action': 'abnormal', 'process_id': 'process...",[],1,0
3,aop.events_1262,1262,https://identifiers.org/aop.events/1262,Apoptosis,"[{'action': 'increased', 'process_id': 'proces...",[],1,0
4,aop.events_1502,1502,https://identifiers.org/aop.events/1502,Histone deacetylase inhibition,"[{'action': 'decreased', 'process_id': 'proces...",[],1,0
5,aop.events_1503,1503,https://identifiers.org/aop.events/1503,"Histone acetylation, increase","[{'action': 'increased', 'process_id': 'proces...",[],1,0
6,aop.events_1239,1239,https://identifiers.org/aop.events/1239,"Altered, Gene Expression","[{'action': 'abnormal', 'process_id': 'process...",[],1,0


## 3. Gene Table - Gene-Protein Associations

The gene table shows genes, their protein products, and expression data:

In [6]:
# Generate gene table from gene associations and expression data
gene_builder = GeneTableBuilder(network)
gene_table = gene_builder.build_gene_table()

print(f"Generated {len(gene_table)} gene table entries")

# Convert to DataFrame for analysis
gene_df = pd.DataFrame(gene_table)

gene_df

Generated 19 gene table entries


Unnamed: 0,gene,protein,protein_id,gene_id,protein_node_id,expression_organs,expression_levels,expression_confidence,expression_ids
0,ENSG00000048052,,,gene_ENSG00000048052,,,,,
1,ENSG00000049130,,,gene_ENSG00000049130,,,,,
2,ENSG00000094631,,,gene_ENSG00000094631,,,,,
3,ENSG00000109320,,,gene_ENSG00000109320,,,,,
4,ENSG00000110427,,,gene_ENSG00000110427,,,,,
5,ENSG00000116478,,,gene_ENSG00000116478,,,,,
6,ENSG00000118971,,,gene_ENSG00000118971,,,,,
7,ENSG00000124762,,,gene_ENSG00000124762,,,,,
8,ENSG00000141510,,,gene_ENSG00000141510,,,,,
9,ENSG00000147099,,,gene_ENSG00000147099,,,,,


## 4. Gene Expression Table - Detailed Expression Data

A dedicated table for gene expression information:

In [7]:
# Generate detailed gene expression table
gene_expr_builder = GeneExpressionTableBuilder(network)
gene_expr_table = gene_expr_builder.build_gene_expression_table()

print(f"Generated {len(gene_expr_table)} gene expression entries")

# Convert to DataFrame
gene_expr_df = pd.DataFrame(gene_expr_table)
gene_expr_df

Generated 0 gene expression entries


## 5. Compound Table - Chemical Stressors

The compound table shows chemical compounds that act as stressors in AOPs:

In [8]:
# Generate compound table from compound associations
compound_builder = CompoundTableBuilder(network)
compound_table = compound_builder.build_compound_table()

print(f"Generated {len(compound_table)} compound table entries")

# Convert to DataFrame
compound_df = pd.DataFrame(compound_table)
compound_df

Generated 8 compound table entries


Unnamed: 0,compound_name,chemical_label,pubchem_id,pubchem_compound,cas_id,chemical_uri,smiles,node_id
0,Apicidin,Apicidin,467801,https://identifiers.org/pubchem.compound/467801,467801,https://identifiers.org/cas/183506-66-3,,chemical_467801
1,Apicidin,Apicidin,6918328,https://identifiers.org/pubchem.compound/6918328,6918328,https://identifiers.org/cas/183506-66-3,,chemical_6918328
2,Butyrate,Butyrate,104775,https://identifiers.org/pubchem.compound/104775,104775,https://identifiers.org/cas/461-55-2,,chemical_104775
3,MS-275,MS-275,4261,https://identifiers.org/pubchem.compound/4261,4261,https://identifiers.org/cas/209783-80-2,,chemical_4261
4,Methoxyacetic acid,Methoxyacetic acid,12251,https://identifiers.org/pubchem.compound/12251,12251,https://identifiers.org/cas/625-45-6,,chemical_12251
5,Suberoylanilide hydroxamic acid,Suberoylanilide hydroxamic acid,5311,https://identifiers.org/pubchem.compound/5311,5311,https://identifiers.org/cas/149647-78-9,,chemical_5311
6,Trichostatin A,Trichostatin A,444732,https://identifiers.org/pubchem.compound/444732,444732,https://identifiers.org/cas/58880-19-6,,chemical_444732
7,Valproic acid,Valproic acid,3121,https://identifiers.org/pubchem.compound/3121,3121,https://identifiers.org/cas/99-66-1,,chemical_3121


## Table Export and Usage

These tables can be easily exported to various formats for further analysis:

In [14]:
# Example: Export tables to CSV files
import os

# Create output directory
output_dir = "aop_tables"
os.makedirs(output_dir, exist_ok=True)

# Export each table
tables_to_export = [
    (aop_df, "aop_relationships.csv"),
    (component_df, "ke_components.csv"), 
    (gene_df, "genes_proteins.csv"),
    (gene_expr_df, "gene_expressions.csv"),
    (compound_df, "compounds.csv")
]

for table_data, filename in tables_to_export:
    filepath = os.path.join(output_dir, filename)
    table_data.to_csv(filepath, index=False)
    print(
        f"Exported {filename}: {len(table_data)} rows, {len(table_data.columns)} columns"
    )

print(f"\nAll tables exported to {output_dir}/ directory")

Exported aop_relationships.csv: 17 rows, 11 columns
Exported ke_components.csv: 7 rows, 8 columns
Exported genes_proteins.csv: 19 rows, 9 columns
Exported gene_expressions.csv: 0 rows, 0 columns
Exported compounds.csv: 8 rows, 8 columns

All tables exported to aop_tables/ directory
