In [None]:
import json
import boto3
import os
import gzip
import io
import logging
from datetime import datetime, time, timedelta
import pytz
from botocore.exceptions import ClientError
from google.cloud import storage
import urllib.parse

logging.getLogger().setLevel(logging.INFO)

def get_secret():
    """Retrieve the GCS Auth Key stored in AWS secretsmanager"""
    
    secret_name = "aws-gcs-ascent-write-access"
    region_name = "ap-south-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        print(f"Error retrieving secret: {e}")
        raise e

    # Secrets are typically stored as JSON strings, so we need to parse it
    secret = get_secret_value_response.get('SecretString')

    if secret:
        secret_dict = json.loads(secret)

        # Extract the specific key-value pairs from the secret
        access_key = secret_dict.get("json_key")
        return access_key

    else:
        print("SecretString is empty or not in a valid format.")
        return None

def assume_role(role_arn):
    """Use STS Client to Assume aws cross account role"""

    # Initialize the STS client to assume role
    sts_client = boto3.client('sts')

    assumed_role_object = sts_client.assume_role(
        RoleArn=role_arn,
        RoleSessionName="LambdaS3AccessSession"
    )
    credentials = assumed_role_object['Credentials']

    # Return the temporary credentials
    return credentials


def compress_file(file_content):
    # Compress the file content using gzip, for egress data transfer cost saving
    compressed_data = io.BytesIO()
    with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gz:
        gz.write(file_content)
    compressed_data.seek(0)
    return compressed_data


def lambda_handler(event, context):

    """Lambda function intend to upload file on GCS on each file upload on s3
        This setup is based on S3<>Lambda Trigger, with Secretsmanager used to
        store the GCS Authentication secrets"""
    
    # Capturing the event details
    event_bucket = event["Records"][0]["s3"]["bucket"]["name"]
    event_object_key = event["Records"][0]["s3"]["object"]["key"]
    decoded_event_object_key = urllib.parse.unquote(event_object_key)
    event_object_key = decoded_event_object_key

    # Parse the event_key, file_name and table_name
    event_key = event_object_key.split('/')[-2:]
    file_name = event_object_key.split('/')[-1]
    table_name = event_object_key.split('/')[-4]

    logging.info(f'Event Bucket: {event_bucket}')
    logging.info(f'Event Object Key: {event_object_key}')
    logging.info(f'Event Key: {event_key}')
    logging.info(f'Event file Name: {file_name}')
    logging.info(f'Event table Name: {table_name}')

    # Role ARN from the S3 account
    role_arn = 'arn:aws:iam::<AWS_Account_Id>:role/as-s3-role-ahwspl-skull-etl-rw'

    # Assume the role to get temporary credentials for cross AWS account access 
    # Required for AWS cross account access s3<>Lambda trigger
    
    credentials = assume_role(role_arn)

    s3_client = boto3.client(
        's3',
        aws_access_key_id=credentials['AccessKeyId'],
        aws_secret_access_key=credentials['SecretAccessKey'],
        aws_session_token=credentials['SessionToken']
    )

    try:
        logging.info(f"Processing file: {file_name}")

        # Get the file content and file size
        file_obj = s3_client.get_object(Bucket=event_bucket, Key=event_object_key)

        file_content = file_obj['Body'].read()
        original_size = len(file_content)
        logging.info(f"Original file size: {original_size} bytes")
        
        # Compress the file content and file size
        compressed_content = compress_file(file_content)
        compressed_size = compressed_content.getbuffer().nbytes
        logging.info(f"Compressed file size: {compressed_size} bytes")

        # Define target path
        # Get the current time in GMT
        utc_time = datetime.now(pytz.utc)

        # Convert GMT to IST
        ist = pytz.timezone('Asia/Kolkata')
        ist_time = utc_time.astimezone(ist)

        # Print both GMT and IST times in YYYY-HH-MM format
        logging.info(f"UTC Time: {utc_time.strftime('%Y-%m-%d %H:%M %Z%z')}")
        logging.info(f"IST Time: {ist_time.strftime('%Y-%m-%d %H:%M %Z%z')}")

        target_path = 'prod/dataplatform_raw_payload/' + table_name + '/dt=' + ist_time.strftime(
            '%Y-%m-%d') + '/' + file_name

        # Upload the compressed file to the target S3 path
        s3_client.upload_fileobj(
            compressed_content,
            bucket_name,
            target_path,
            ExtraArgs={'ContentType': 'application/gzip'}
        )

        logging.info(f"Compressed file uploaded to: {target_path}")

        # lambda to GCS Access
        try:
            access_key = get_secret()

            # Parse the access_key string into a dictionary
            access_key_dict = json.loads(access_key)

            # Initialize Google Cloud Storage Client
            client = storage.Client.from_service_account_info(access_key_dict)

            # Access your bucket
            bucket = client.get_bucket(event_bucket)

            # Create a blob object from the target path
            blob = bucket.blob(target_path)

            # Upload the compressed content
            blob.upload_from_file(compressed_content, content_type='application/gzip')

            logging.info(f"Compressed file uploaded to: gs://{event_bucket}/{target_path}")

        except Exception as e:
            print(e)

        return {
            'statusCode': 200,
            'body': json.dumps('Files compressed and uploaded successfully.')
        }
    except Exception as e:
        logging.info(f"Error accessing S3: {str(e)}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error accessing S3: {str(e)}")
        }
