In [1]:
import os
import dotenv
import time

import polars as pl
from deltalake import DeltaTable, write_deltalake
from polars import DataFrame, LazyFrame
from pyarrow.dataset import dataset
from s3fs import S3FileSystem

In [4]:
def load_s3_envvars(vars: list[str]):
    """A function to check whether the s3 keys are set inside .env file
    Args:
        vars (list[str], optional): A list of required key. Defaults to REQUIRED_S3_KEYS.

    Raises:
        ValueError: Raise error when a key is missing

    Returns:
        _type_: None_
    """
    dotenv.load_dotenv()

    for var in vars:
        if not os.getenv(var):
            raise ValueError(
                f"Required environment variables are not set correctly: {var}"
            )

    return None

In [2]:
def read_s3_parquet(s3_loc) -> LazyFrame | None:
    # check .env files

    # setup cloud filesystem access
    try:
        cloudfs = S3FileSystem(
            key=os.getenv("AWS_ACCESS_KEY_ID"),
            secret=os.getenv("AWS_SECRET_ACCESS_KEY"),
        )

        # reference multiple parquet files
        pyarrow_dataset = dataset(
            source=s3_loc,
            filesystem=cloudfs,
            format="parquet",
        )

        # load efficiently into polars
        df = pl.scan_pyarrow_dataset(pyarrow_dataset)
        return df
    except Exception as e:
        print(f"Error reading parquet file from AMZ S3: {e}")
        return None

In [5]:
def main():
    start_time = time.time()
    
    REQUIRED_S3_KEYS = [
        "AWS_DEFAULT_REGION",
        "AWS_ACCESS_KEY_ID",
        "AWS_SECRET_ACCESS_KEY",
        "LOCAL_FILE_PATH",
        "S3_BUCKET",
    ]
    s3_loc = "s3://sonlebucket/delivery_orders_march.parquet"
    # check .env file
    load_s3_envvars(vars = REQUIRED_S3_KEYS)
    
    #read parquet from s3
    ldf = read_s3_parquet(s3_loc=s3_loc)
    
    print(
        ldf.fetch(n_rows=10)
    )
    
    print(f"Polars execution time: {time.time() - start_time} seconds")
    
if __name__ == "__main__":
    main()