In [None]:
import pyarrow.parquet as pq
import io
import boto3
import pandas as pd

def lambda_handler(event, context):
    s3 = boto3.client('s3')

    # Extract bucket and object key from the event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # 📥 Read Parquet file from S3
    response = s3.get_object(Bucket=bucket, Key=key)
    buffer = io.BytesIO(response['Body'].read())
    df = pd.read_parquet(buffer, engine='pyarrow')

    # ✅ Apply transformation (example: drop null rows)
    df_cleaned = df.dropna()

    # 💾 Save cleaned DataFrame as Parquet again
    out_buffer = io.BytesIO()
    df_cleaned.to_parquet(out_buffer, index=False, engine='pyarrow')

    # 📤 Write back to S3 processed/ folder
    new_key = key.replace("raw/", "processed/")
    s3.put_object(Bucket=bucket, Key=new_key, Body=out_buffer.getvalue())

    return {
        'statusCode': 200,
        'body': f"Transformed file uploaded to: {new_key}"
    }
