Skip to content

Commit

Permalink
parse bore layers (#263)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 12, 2022
1 parent 81dc921 commit 3cbeb99
Show file tree
Hide file tree
Showing 12 changed files with 422 additions and 28 deletions.
2 changes: 2 additions & 0 deletions pygef/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pygef.gef.utils import depth_to_nap, join_gef, nap_to_depth
from pygef.broxml import CPTData, QualityClass, Location, BoreData
from pygef.shim import read_cpt
from pygef.shim import read_bore


__all__ = [
Expand All @@ -14,6 +15,7 @@
"CPTData",
"BoreData",
"read_cpt",
"read_bore",
"QualityClass",
"Location",
]
15 changes: 14 additions & 1 deletion pygef/bore.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
from pygef.broxml import Location
import polars as pl


@dataclass
class BoreData:
todo: int
research_report_date: date
description_procedure: str
delivered_location: Location
delivered_vertical_position_offset: float | None
delivered_vertical_position_datum: str
delivered_vertical_position_reference_point: str
bore_rock_reached: bool
final_bore_depth: float
final_sample_depth: float | None
bore_hole_completed: bool
data: pl.DataFrame
2 changes: 2 additions & 0 deletions pygef/broxml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from pygef.cpt import CPTData, QualityClass, Location
from pygef.bore import BoreData
from pygef.broxml.parse_cpt import read_cpt
from pygef.broxml.parse_bore import read_bore


__all__ = [
"CPTData",
"BoreData",
"read_cpt",
"read_bore",
"QualityClass",
"Location",
"_BroXmlBore",
Expand Down
136 changes: 136 additions & 0 deletions pygef/broxml/parse_bore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from __future__ import annotations

import io
import re
from pathlib import Path

from pygef.broxml import resolvers
from pygef.broxml.xml_parser import read_xml
from pygef.bore import BoreData
from lxml import etree


# maps keyword argument to:
# xpath: query passed to elementree.find
# resolver: A function that converts the string to the proper datatype
# Fn(str) -> Any
# el-atrr: Optional: attribute of an element taken before send to resolver
# Version 1 not supported, this is a WIP
BORE_ATTRIBS_V1 = {
"research_report_date": {
"xpath": "./researchReportDate/brocom:date",
"resolver": resolvers.parse_date,
"el-attr": "text",
},
"description_procedure": {
"xpath": "./isbhrgt:boreholeSampleDescription/bhrgtcom:descriptionProcedure",
"el-attr": "text",
},
"delivered_location": {
"xpath": "./isbhrgt:deliveredLocation/bhrgtcom:location/gml:Point",
"resolver": resolvers.parse_gml_location,
},
"delivered_vertical_position_offset": {
"xpath": "./isbhrgt:deliveredVerticalPosition/bhrgtcom:offset",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"delivered_vertical_position_datum": {
"xpath": "./isbhrgt:deliveredVerticalPosition/bhrgtcom:verticalDatum",
"resolver": resolvers.lower_text,
"el-attr": "text",
},
"delivered_vertical_position_reference_point": {
"xpath": "./isbhrgt:deliveredVerticalPosition/bhrgtcom:localVerticalReferencePoint",
"resolver": resolvers.lower_text,
"el-attr": "text",
},
"bore_rock_reached": {
"xpath": "./isbhrgt:boring/bhrgtcom:rockReached",
"resolver": resolvers.parse_bool,
"el-attr": "text",
},
"final_bore_depth": {
"xpath": "./isbhrgt:boring/bhrgtcom:finalDepthBoring",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"final_sample_depth": {
"xpath": "./isbhrgt:boring/bhrgtcom:finalDepthSampling",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"bore_hole_completed": {
"xpath": "./isbhrgt:boring",
"resolver": resolvers.parse_bool,
"el-attr": "text",
},
}

BORE_ATTRIBS_V2 = {
"research_report_date": {
"xpath": "./researchReportDate/brocom:date",
"resolver": resolvers.parse_date,
"el-attr": "text",
},
"description_procedure": {
"xpath": "./boreholeSampleDescription/bhrgtcom:descriptionProcedure",
"el-attr": "text",
},
"delivered_location": {
"xpath": "./deliveredLocation/bhrgtcom:location/gml:Point",
"resolver": resolvers.parse_gml_location,
},
"delivered_vertical_position_offset": {
"xpath": "./deliveredVerticalPosition/bhrgtcom:offset",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"delivered_vertical_position_datum": {
"xpath": "./deliveredVerticalPosition/bhrgtcom:verticalDatum",
"resolver": resolvers.lower_text,
"el-attr": "text",
},
"delivered_vertical_position_reference_point": {
"xpath": "./deliveredVerticalPosition/bhrgtcom:localVerticalReferencePoint",
"resolver": resolvers.lower_text,
"el-attr": "text",
},
"bore_rock_reached": {
"xpath": "./boring/bhrgtcom:rockReached",
"resolver": resolvers.parse_bool,
"el-attr": "text",
},
"final_bore_depth": {
"xpath": "./boring/bhrgtcom:finalDepthBoring",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"final_sample_depth": {
"xpath": "./boring/bhrgtcom:finalDepthSampling",
"resolver": resolvers.parse_float,
"el-attr": "text",
},
"bore_hole_completed": {
"xpath": "./boring",
"resolver": resolvers.parse_bool,
"el-attr": "text",
},
"data": {
"xpath": "./boreholeSampleDescription/bhrgtcom:descriptiveBoreholeLog",
"resolver": resolvers.process_bore_result,
},
}


def read_bore(file: io.BytesIO | Path | str) -> list[BoreData]:
root = etree.parse(file).getroot()
match = re.compile(r"xsd/.*/(\d\.\d)")
matched = match.search(root.nsmap["bhrgtcom"])

if matched is None:
raise ValueError("could not find the brhtcom version")
else:
if 3.0 >= float(matched.group(1)) < 2.0:
raise ValueError("only bhrgtcom/2.x is supported ")
return read_xml(root, BoreData, BORE_ATTRIBS_V2, "sourceDocument")
12 changes: 9 additions & 3 deletions pygef/broxml/parse_cpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import io
from pathlib import Path
from lxml import etree

from pygef.broxml import resolvers
from pygef.broxml.xml_parser import read_xml
Expand All @@ -15,11 +16,15 @@
# el-atrr: Optional: attribute of an element taken before send to resolver
CPT_ATTRIBS = {
"bro_id": {"xpath": "brocom:broId"},
"research_report_date": {"xpath": "./researchReportDate/brocom:date"},
"research_report_date": {
"xpath": "./researchReportDate/brocom:date",
"resolver": resolvers.parse_date,
"el-attr": "text",
},
"cpt_standard": {"xpath": "cptStandard"},
"standardized_location": {
"xpath": "./standardizedLocation/brocom:location",
"resolver": resolvers.parse_brocom_location,
"resolver": resolvers.parse_gml_location,
},
"dissipationtest_performed": {
"xpath": "./conePenetrometerSurvey/cptcommon:dissipationTestPerformed",
Expand Down Expand Up @@ -180,4 +185,5 @@


def read_cpt(file: io.BytesIO | Path | str) -> list[CPTData]:
return read_xml(file, CPTData, CPT_ATTRIBS)
tree = etree.parse(file)
return read_xml(tree.getroot(), CPTData, CPT_ATTRIBS, "dispatchDocument")
84 changes: 81 additions & 3 deletions pygef/broxml/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from warnings import warn
from typing import Any
from lxml import etree
from datetime import date, datetime

from pygef.broxml import QualityClass, Location
import polars as pl
Expand All @@ -20,18 +21,95 @@ def parse_int(val: str, **kwargs: dict[Any, Any]) -> int:
return int(val)


def parse_date(val: str, **kwargs: dict[Any, Any]) -> date:
return datetime.strptime(val, "%Y-%m-%d").date()


def parse_bool(val: str, **kwargs: dict[Any, Any]) -> bool:
val = val.lower()
if val == "ja":
return True
if val == "nee":
if val == "nee" or val == "geen":
return False
return bool(val)


def process_bore_result(el: etree.Element, **kwargs: dict[Any, Any]) -> pl.DataFrame:
namespaces = kwargs["namespaces"]
upper_boundary = []
lower_boundary = []
geotechnical_soil_name = []
color = []
dispersed_inhomogenity = []
organic_matter_content_class = []
sand_median_class = []
for layer in el.iterfind("bhrgtcom:layer", namespaces=namespaces):
upper_boundary.append(
float(layer.find("bhrgtcom:upperBoundary", namespaces=namespaces).text)
)
lower_boundary.append(
float(layer.find("bhrgtcom:lowerBoundary", namespaces=namespaces).text)
)
geotechnical_soil_name.append(
layer.find(
"bhrgtcom:soil/bhrgtcom:geotechnicalSoilName", namespaces=namespaces
).text
)
color.append(
layer.find("bhrgtcom:soil/bhrgtcom:colour", namespaces=namespaces).text
)
dispersed_inhomogenity.append(
parse_bool(
layer.find(
"bhrgtcom:soil/bhrgtcom:dispersedInhomogeneity",
namespaces=namespaces,
).text
)
)
organic_matter_content_class.append(
layer.find(
"bhrgtcom:soil/bhrgtcom:organicMatterContentClass",
namespaces=namespaces,
).text
)
try:
sand_median_class.append(
layer.find(
"bhrgtcom:soil/bhrgtcom:sandMedianClass", namespaces=namespaces
).text
)
except AttributeError:
sand_median_class.append(None)

variables = locals()
return pl.DataFrame(
{
name: variables[name]
for name in [
"upper_boundary",
"lower_boundary",
"geotechnical_soil_name",
"color",
"dispersed_inhomogenity",
"organic_matter_content_class",
"sand_median_class",
]
}
)


def process_cpt_result(el: etree.Element, **kwargs: dict[Any, Any]) -> pl.DataFrame:
"""
Parse the cpt data into a `DataFrame`
Parameters
----------
el
conePenetrometerSurvey
kwargs
namespaces.
"""
namespaces = kwargs["namespaces"]
"""Resolver for conePenetrometerSurvey/cptcommon:conePenetrationTest/cptcommon:cptResult."""

prefix = "./cptcommon:conePenetrationTest/cptcommon:cptResult"

Expand Down Expand Up @@ -73,7 +151,7 @@ def process_cpt_result(el: etree.Element, **kwargs: dict[Any, Any]) -> pl.DataFr
)


def parse_brocom_location(el: etree.Element, **kwargs: dict[Any, Any]) -> Location:
def parse_gml_location(el: etree.Element, **kwargs: dict[Any, Any]) -> Location:
"""Resolver for standardizedLocation/brocom:location"""
srs_name = el.attrib["srsName"]
pos = next(el.iterfind("./gml:pos", namespaces=kwargs["namespaces"])).text
Expand Down
16 changes: 6 additions & 10 deletions pygef/broxml/xml_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import io
from pathlib import Path
from typing import TypeVar, cast, Any, Callable
from lxml import etree

Expand All @@ -12,27 +10,25 @@


def read_xml(
file: io.BytesIO | Path | str,
root: etree.Element,
constructor: Callable[..., T],
resolver_schema: dict[str, Any],
payload_root: str,
) -> list[T]:
tree = etree.parse(file)

root = tree.getroot()
namespaces = root.nsmap
dd = root.find("dispatchDocument", namespaces)
dd = root.find(payload_root, namespaces)

out: list[T] = []

cpts = dd.findall("./*")
for cpt in cpts:
payloads = dd.findall("./*")
for payload in payloads:

# kwargs of attribute: value
resolved = dict()

for (atrib, d) in resolver_schema.items():
d = cast(dict[str, Any], d)
el = cpt.find(d["xpath"], cpt.nsmap)
el = payload.find(d["xpath"], payload.nsmap)

if el is not None:
if "resolver" in d:
Expand Down
3 changes: 2 additions & 1 deletion pygef/cpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any

import polars as pl
from datetime import date
from enum import Enum
import copy
import pprint
Expand All @@ -26,7 +27,7 @@ class Location:
class CPTData:
# dispatch_document cpt
bro_id: str | None
research_report_date: str | None
research_report_date: date
cpt_standard: str | None
standardized_location: Location | None
# conepenetrometersurvey
Expand Down

0 comments on commit 3cbeb99

Please sign in to comment.