In [1]:
import base64
import hashlib
import subprocess
import pandas as pd
import numpy as np

INPUT_FILE = "data.csv"
ANSWER_FILE = "exrc-04-answer.txt"


def decrypt_with_gpg(encrypted_bytes: bytes) -> str:

    # Decrypted bytes data using local gpg.

    result = subprocess.run(
        ["gpg", "--decrypt"],
        input=encrypted_bytes,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        check=True,
    )
    return result.stdout.decode("utf-8").strip()


def parse_value(value: str):
   
    # Parse and decrypt value if needed.
    
    if isinstance(value, str) and value.startswith("PGP_B64:"):
        b64_data = value[len("PGP_B64:") :]
        encrypted_bytes = base64.b64decode(b64_data)
        plainText = decrypt_with_gpg(encrypted_bytes)
    else:
        plainText = value

    try:
        return float(plainText)
    except (ValueError, TypeError):
        return np.nan


def main():
    #Read The CSV file into a DataFrame
    dataFrame = pd.read_csv(INPUT_FILE)

    #Decrypt and convert to numeric, invalid parsing will be NaN
    dataFrame["NumericValue"] = dataFrame["ParameterValue"].apply(parse_value)

    #Filter required parameters
    dataFrame = dataFrame[dataFrame["ParameterName"].isin(["TA_PT1H_MAX", "TA_PT1H_MIN"])]

    #Keep first occurrence per (Time, ParameterName)
    dataFrame = dataFrame.drop_duplicates(subset=["Time", "ParameterName"], keep="first")

    #Pivot to have max and min in columns
    pivot = dataFrame.pivot(index="Time", columns="ParameterName", values="NumericValue")

    #Drop rows with missing max/min values
    pivot = pivot.dropna(subset=["TA_PT1H_MAX", "TA_PT1H_MIN"])

    #Build canonical table with required columns
    
    canonical = pivot.reset_index()
    
    canonical = canonical.rename(columns={
        "TA_PT1H_MAX": "max",
        "TA_PT1H_MIN": "min"
    })

    canonical["range"] = canonical["max"] - canonical["min"]

    #Sort by Time ascending (string sort)
    canonical = canonical.sort_values("Time")

    canonical = canonical[["Time", "max", "min", "range"]]

    #Format exactly 3 decimals for max, min, range
    for col in ["max", "min", "range"]:
        canonical[col] = canonical[col].map(lambda x: f"{x:.3f}")

    #Build canonical CSV string manually (OS-independent)
    lines = ["Time,max,min,range"]
    for _, row in canonical.iterrows():
        lines.append(f"{row['Time']},{row['max']},{row['min']},{row['range']}")

    payload = "\n".join(lines) + "\n"

    #Compute SHA-256 hash of the payload
    sha256 = hashlib.sha256(payload.encode("utf-8")).hexdigest()

    short_hash = sha256[:16]

    print("Full SHA256:", sha256)
    print("First 16 hex:", short_hash)

    #Write answer file with the short hash
    with open(ANSWER_FILE, "w", encoding="utf-8", newline="\n") as f:
        f.write(short_hash + "\n")


if __name__ == "__main__":
    main()


Full SHA256: f8429164e5149ae431ea501831dbcd91ef74b3fc01d0ee0504f8394d8ac2fe9c
First 16 hex: f8429164e5149ae4


AI prompts used

How to make the paramemter name as column headers in python? How to transform the DataFrame from a long format to a wide format? How to remove rows from a particular field where value is missing? Hwo to open the existing file in write mode in python?
Howto compute SHA-256 hash from utf-8 encoded string? How to decode base64 by invoking GPG in python?

External LInk

https://www.geeksforgeeks.org/python/python-pandas-pivot/ https://nostarch.com/python-crash-course-3rd-edition

https://www.geeksforgeeks.org/python/hashlib-module-in-python/. https://www.digitalocean.com/community/tutorials/python-read-file-open-write-delete-copy

https://www.qodo.ai/blog/pandas-pivot-tables-a-comprehensive-guide-for-data-science/  https://docs.python.org/3/library/hashlib.html

https://www.geeksforgeeks.org/python/encoding-and-decoding-base64-strings-in-python/  https://www.geeksforgeeks.org/python/python-subprocess-module/

https://www.tecmint.com/gpg-encrypt-decrypt-files/. 

Reflections on the Process

The challenge I faced in this exercise was handling the handling the CSV structure correctly. I attempted to directly access specific parameter columns which resulted in errors because the dataset was structured in a long format rather than a wide format. By using pivot(), I was able to convert the parameter names into columns, making it easy to extract and process the required data.

I got that even minor inconsistencies in newline handling or formatting could change the SHA-256 hash result.I built the canonical CSV string manually using explicit \n line separators to ensure cross-platform consistency.

I decided to refactor it using pandas to make the code more maintainable. With pandas, I could transform the data in a much cleaner, more declarative way using builtin operations like filtering, deduplication, pivoting, and sorting. This approch is much more clearer from the manual logic.

The process was smooth overall. The external resources further helped me understand the necessary Pandas functions better. The AI prompts were helpful in debuggin the problem, especially when I had to deal with a large dataset.