In [0]:
import logging
import os
from datetime import datetime

# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)

# Set up log file name with current date and time
now = datetime.now()
log_filename = f"logs/run_{now.strftime('%Y%m%d_%H%M')}.log"

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s | %(levelname)s | %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler(log_filename)
    ]
)

# Log start of run
logging.info("Run started.")
logging.info(f"Cluster/runtime info: AWS Serverless interactive cluster (terminated by inactivity)")
logging.info(f"menu file path: /Volumes/workspace/default/pandas/menu_items.csv")
logging.info(f"orders file path: /Volumes/workspace/default/pandas/order_details.csv")
logging.info(f"Log file: {log_filename}")


In [0]:
import os, random, numpy as np
import subprocess
import hashlib
import json

# Fix random seeds
os.environ['PYTHONHASHSEED'] = '0'
random.seed(0)
np.random.seed(0)
logging.info("Random seeds set to 0")

# Capture environment
!pip freeze > requirements.txt
logging.info("Saved environment packages to requirements.txt")

# Compute SHA-256 hashes for input CSVs
def compute_sha256(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path, "rb") as f:
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()

data_files = ["/Volumes/workspace/default/pandas/menu_items.csv", "/Volumes/workspace/default/pandas/order_details.csv"]
hashes = {}
for f in data_files:
    if os.path.exists(f):
        hashes[f] = compute_sha256(f)
    else:
        logging.warning(f"File not found: {f}. Skipping hash computation.")

with open("data_hashes.json", "w") as f:
    json.dump(hashes, f, indent=2)

logging.info(f"Data hashes saved to data_hashes.json: {hashes}")

In [0]:
import pandas as pd

menu = pd.read_csv('/Volumes/workspace/default/pandas/menu_items.csv')
orders = pd.read_csv('/Volumes/workspace/default/pandas/order_details.csv')

print('menu_items shape:', menu.shape)
print('order_details shape:', orders.shape)
display(menu.head())
display(orders.head())