# Silver: Process data
Similar to the classical [ETL](https://en.wikipedia.org/wiki/Extract,_transform,_load) process, the silver layer performs transformations to prepare data for serving.

In [1]:
import duckdb
import boto3
import pandas as pd
from io import BytesIO
from datetime import datetime

# MinIO Configuration
# -------------------------------------------------------------------------------------------------
MINIO_ENDPOINT = "http://minio:9000"
# Danger-Zone (https://www.youtube.com/watch?v=siwpn14IE7E)
# Typicalla a kind of vault would be used e.g. https://azure.microsoft.com/en-us/products/key-vault, ...
MINIO_ACCESS_KEY = "admin"
MINIO_SECRET_KEY = "password"
BUCKET_NAME = "weather-data"
SILVER_FILE_NAME = "silver/weather_cleaned.parquet"

# Initialize MinIO Client
s3 = boto3.client(
    "s3",
    endpoint_url=MINIO_ENDPOINT,
    aws_access_key_id=MINIO_ACCESS_KEY,
    aws_secret_access_key=MINIO_SECRET_KEY
)

# List all parquet files in the bronze layer (latest date)
response = s3.list_objects_v2(Bucket=BUCKET_NAME, Prefix="bronze/")
files = [obj["Key"] for obj in response.get("Contents", []) if obj["Key"].endswith(".parquet")]

# Load all parquet files into DuckDB
conn = duckdb.connect(database=":memory:")
df_list = []

for file in files:
    obj = s3.get_object(Bucket=BUCKET_NAME, Key=file)
    df = pd.read_parquet(BytesIO(obj["Body"].read()))
    df_list.append(df)

df_combined = pd.concat(df_list)

# Register in DuckDB and perform transformations
conn.register("weather_data", df_combined)

df_transformed = conn.execute("""
    SELECT 
        timestamp,
        temperature,
        humidity,
        wind_speed,
        CASE 
            WHEN temperature < 0 THEN 'Freezing'
            WHEN temperature BETWEEN 0 AND 10 THEN 'Cold'
            WHEN temperature BETWEEN 10 AND 20 THEN 'Mild'
            ELSE 'Warm'
        END AS temperature_category
    FROM weather_data
""").fetchdf()

# Save cleaned data to MinIO
parquet_buffer = BytesIO()
df_transformed.to_parquet(parquet_buffer, engine="pyarrow")
s3.put_object(Bucket=BUCKET_NAME, Key=SILVER_FILE_NAME, Body=parquet_buffer.getvalue())

print(f"Transformed data stored in MinIO at {SILVER_FILE_NAME}")

Transformed data stored in MinIO at silver/weather_cleaned.parquet
