In [None]:
import boto3
import json
import pandas as pd

In [None]:
def lambda_handler(event, context):

    s3_client = boto3.client('s3')
    source_bucket = event['Records'][0]['s3']['bucket']['name']
    object_key =  event['Records'][0]['s3']['object']['key']
    #print(source_bucket) # these print statements will show on CloudWatch logs
    #print(object_key)

    target_bucket = 'transformed-json-data-to-csv-bucket'
    target_file_name = object_key[:-5] # excludes '.json' file extention from file name, will keep file name same but adding csv extetion
    #print(target_file_name)

    waiter = s3_client.get_waiter('object_exists')
    waiter.wait(Bucket=source_bucket, Key=object_key)

    response = s3_client.get_object(Bucket=source_bucket, Key=object_key)
    #print(response)

    # response is a dictionary that contains:   
    # Body: a streaming object containing the file's content
    # ContentLength, ContentType, and metadata
    # It called "Streaming" because, it does not download the entire file into memory immediately
    # It streams the content in chunks from S3. This is more memory-efficient — great for large files!
    data = response['Body'] # StreamingBody object
    #print(data)

    # .read() reads the entire byte content of the file from the stream.
    # .decode('utf-8') converts the bytes into a string.
    data = response['Body'].read().decode('utf-8')
    #print(data)

    # Parses the JSON string into a Python object (usually a dict).
    data = json.loads(data)
    #print(data)

    f =[]
    for i in data["results"]:
        f.append(i)
    df = pd.DataFrame(f)
    selected_columns = ['bathrooms', 'bedrooms', 'city', 'homeStatus','homeType','livingArea','price','rentZestimate','zipcode']

    df = df[selected_columns]
    #print(df)

    # Convert DataFrame to CSV format
    csv_data = df.to_csv(index=False)

    # Upload CSV to S3
    bucket_name = target_bucket
    object_key = f"{target_file_name}.csv"
    s3_client.put_object(Bucket= bucket_name, Key = object_key, Body=csv_data)

    return{
        'statusCode': 200,
        'body': json.dumps('CSV conversion and S3 upload completed succrssfully')
    }