Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add graph construction from a dataframe #264

Merged
merged 3 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* [Logging] - [#242](https://github.com/a-r-j/graphein/pull/242) Adds control of protein graph construction logging. Resolves [#238](https://github.com/a-r-j/graphein/issues/238)

#### Protein
* [Feature] - [#264](https://github.com/a-r-j/graphein/pull/264) Adds entrypoint to `graphein.protein.graphs.construct_graph` for passing in a BioPandas dataframe directly.
* [Feature] - [#229](https://github.com/a-r-j/graphein/pull/220) Adds support for filtering KNN edges based on self-loops and chain membership. Contribution by @anton-bushuiev.
* [Feature] - [#234](https://github.com/a-r-j/graphein/pull/234) Adds support for aggregating node features over residues (`graphein.protein.features.sequence.utils.aggregate_feature_over_residues`).
* [Bugfix] - [#234](https://github.com/a-r-j/graphein/pull/234) fixes use of nullcontext in silent graph construction.
Expand Down
36 changes: 22 additions & 14 deletions graphein/protein/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ def construct_graph(
pdb_path: Optional[str] = None,
uniprot_id: Optional[str] = None,
pdb_code: Optional[str] = None,
df: Optional[pd.DataFrame] = None,
chain_selection: str = "all",
model_index: int = 1,
df_processing_funcs: Optional[List[Callable]] = None,
Expand All @@ -632,7 +633,8 @@ def construct_graph(
verbose: bool = True,
) -> nx.Graph:
"""
Constructs protein structure graph from a ``pdb_code`` or ``pdb_path``.
Constructs protein structure graph from a ``pdb_code``, ``pdb_path``,
``uniprot_id`` or a BioPandas DataFrame containing ``ATOM`` data.

Users can provide a :class:`~graphein.protein.config.ProteinGraphConfig`
object to specify construction parameters.
Expand All @@ -655,6 +657,9 @@ def construct_graph(
:param uniprot_id: UniProt accession ID to build graph from AlphaFold2DB.
Default is ``None``.
:type uniprot_id: str, optional
:param df: Pandas dataframe containing ATOM data to build graph from.
Default is ``None``.
:type df: pd.DataFrame, optional
:param chain_selection: String of polypeptide chains to include in graph.
E.g ``"ABDF"`` or ``"all"``. Default is ``"all"``.
:type chain_selection: str
Expand Down Expand Up @@ -683,9 +688,14 @@ def construct_graph(
:rtype: nx.Graph
"""

if pdb_code is None and pdb_path is None and uniprot_id is None:
if (
pdb_code is None
and pdb_path is None
and uniprot_id is None
and df is None
):
raise ValueError(
"Either a PDB ID, UniProt ID or a path to a local PDB file"
"Either a PDB ID, UniProt ID, a dataframe or a path to a local PDB file"
" must be specified to construct a graph"
)

Expand All @@ -698,10 +708,6 @@ def construct_graph(
with context as progress:
if verbose:
task1 = progress.add_task("Reading PDB file...", total=1)
# Get name from pdb_file is no pdb_code is provided
# if pdb_path and (pdb_code is None and uniprot_id is None):
# pdb_code = get_protein_name_from_filename(pdb_path)
# pdb_code = pdb_code if len(pdb_code) == 4 else None
progress.advance(task1)

# If config params are provided, overwrite them
Expand Down Expand Up @@ -730,13 +736,15 @@ def construct_graph(
if config.edge_metadata_functions is None
else config.edge_metadata_functions
)

raw_df = read_pdb_to_dataframe(
pdb_path,
pdb_code,
uniprot_id,
model_index=model_index,
)
if df is None:
raw_df = read_pdb_to_dataframe(
pdb_path,
pdb_code,
uniprot_id,
model_index=model_index,
)
else:
raw_df = df

if verbose:
task2 = progress.add_task("Processing PDB dataframe...", total=1)
Expand Down