# Petrinex Volumetrics - Load and Display

Load Alberta volumetric data from Petrinex into Spark DataFrames.

**Features:** Unity Catalog compatible • Direct repo import • Auto ZIP extraction


## Setup


In [None]:
import sys, os
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
sys.path.insert(0, os.path.dirname(notebook_path))

from petrinex import PetrinexVolumetricsClient
from pyspark.sql import functions as F
from datetime import datetime, timedelta

client = PetrinexVolumetricsClient(spark=spark, jurisdiction="AB", file_format="CSV")
print("✓ Ready")


## List Files (Optional)


In [None]:
cutoff = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
files = client.list_updated_after(cutoff)
print(f"Found {len(files)} files updated after {cutoff}")
[print(f"{f.production_month} | {f.updated_ts}") for f in files[:10]]


## Load Data


In [None]:
df = client.read_updated_after_as_spark_df_via_pandas(
    "2026-01-01",  # Change date as needed
    pandas_read_kwargs={"dtype": str, "encoding": "latin1"}
)
df.cache()
print(f"✓ Loaded {df.count():,} rows × {len(df.columns)} columns")


## Display


In [None]:
df.printSchema()


In [None]:
display(df.limit(100))


In [None]:
display(df.groupBy("production_month").agg(F.count("*").alias("records")).orderBy("production_month"))


## Save to Delta (Optional)


In [None]:
# Uncomment to save:
# df.withColumn("year", F.substring("production_month", 1, 4)) \
#   .withColumn("month", F.substring("production_month", 6, 2)) \
#   .write.format("delta").mode("overwrite") \
#   .partitionBy("year", "month") \
#   .saveAsTable("main.petrinex.volumetrics_raw")
