Skip to content

Commit

Permalink
Neat 284 extractor sequence (#483)
Browse files Browse the repository at this point in the history
* added timeseries extractor

* added seqeunce extractor

* Linting and static code checks

---------

Co-authored-by: nikokaoja <nikokaoja@users.noreply.github.com>
  • Loading branch information
nikokaoja and nikokaoja committed Jun 6, 2024
1 parent 8db8d2d commit 750e553
Show file tree
Hide file tree
Showing 7 changed files with 9,171 additions and 2 deletions.
9 changes: 8 additions & 1 deletion cognite/neat/graph/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from ._classic_cdf._assets import AssetsExtractor
from ._classic_cdf._relationships import RelationshipsExtractor
from ._classic_cdf._sequences import SequencesExtractor
from ._classic_cdf._timeseries import TimeSeriesExtractor
from ._mock_graph_generator import MockGraphGenerator

__all__ = ["AssetsExtractor", "MockGraphGenerator", "RelationshipsExtractor", "TimeSeriesExtractor"]
__all__ = [
"AssetsExtractor",
"MockGraphGenerator",
"RelationshipsExtractor",
"TimeSeriesExtractor",
"SequencesExtractor",
]
92 changes: 92 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_sequences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from collections.abc import Iterable
from datetime import datetime
from pathlib import Path
from typing import cast

import pytz
from cognite.client import CogniteClient
from cognite.client.data_classes import Sequence, SequenceList
from pydantic import AnyHttpUrl, ValidationError
from rdflib import RDF, Literal, Namespace, URIRef

from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph.extractors._base import BaseExtractor
from cognite.neat.graph.models import Triple
from cognite.neat.utils.utils import string_to_ideal_type


class SequencesExtractor(BaseExtractor):
def __init__(
self,
sequence: Iterable[Sequence],
namespace: Namespace | None = None,
):
self.namespace = namespace or DEFAULT_NAMESPACE
self.sequence = sequence

@classmethod
def from_dataset(
cls,
client: CogniteClient,
data_set_external_id: str,
namespace: Namespace | None = None,
):
return cls(cast(Iterable[Sequence], client.sequences(data_set_external_ids=data_set_external_id)), namespace)

@classmethod
def from_file(cls, file_path: str, namespace: Namespace | None = None):
return cls(SequenceList.load(Path(file_path).read_text()), namespace)

def extract(self) -> Iterable[Triple]:
"""Extract sequences as triples."""
for sequence in self.sequence:
yield from self._sequence2triples(sequence, self.namespace)

@classmethod
def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
id_ = namespace[str(sequence.id)]

# Set rdf type
triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]

# Create attributes

if sequence.external_id is not None:
triples.append((id_, namespace.external_id, Literal(sequence.external_id)))

if sequence.name is not None:
triples.append((id_, namespace.name, Literal(sequence.name)))

if sequence.metadata:
for key, value in sequence.metadata.items():
if value:
type_aware_value = string_to_ideal_type(value)
try:
triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
except ValidationError:
triples.append((id_, namespace[key], Literal(type_aware_value)))

if sequence.description is not None:
triples.append((id_, namespace.description, Literal(sequence.description)))

if sequence.created_time is not None:
triples.append(
(id_, namespace.created_time, Literal(datetime.fromtimestamp(sequence.created_time / 1000, pytz.utc)))
)

if sequence.last_updated_time is not None:
triples.append(
(
id_,
namespace.last_updated_time,
Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, pytz.utc)),
)
)

if sequence.data_set_id is not None:
triples.append((id_, namespace.data_set_id, namespace[str(sequence.data_set_id)]))

if sequence.asset_id is not None:
triples.append((id_, namespace.asset, namespace[str(sequence.asset_id)]))

return triples
2 changes: 1 addition & 1 deletion cognite/neat/graph/extractors/_classic_cdf/_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def from_file(cls, file_path: str, namespace: Namespace | None = None):
return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace)

def extract(self) -> Iterable[Triple]:
"""Extract timeseries an asset with the given asset_id."""
"""Extract timeseries as triples."""
for timeseries in self.timeseries:
yield from self._timeseries2triples(timeseries, self.namespace)

Expand Down
1 change: 1 addition & 0 deletions tests/tests_unit/graph/test_extractors/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
ASSET_EXTRACTOR_DATA = Path(__file__).parent / "asset_extractor_data"
RELATIONSHIP_EXTRACTOR_DATA = Path(__file__).parent / "relationship_extractor_data"
TIMESERIES_EXTRACTOR_DATA = Path(__file__).parent / "timeseries_extractor_data"
SEQUENCES_EXTRACTOR_DATA = Path(__file__).parent / "sequences_extractor_data"
Loading

0 comments on commit 750e553

Please sign in to comment.