In [None]:
import boto3
import pandas as pd
import io
import os

In [None]:
def read_s3_csv(account_id, bucket_name, file_key, region_name='us-east-1'):
    """
    Read a CSV file from S3 and return the first 5 lines

    Parameters:
    -----------
    bucket_name : str
        Name of the S3 bucket
    file_key : str
        Path to the CSV file in S3 (e.g., 'folder/file.csv')
    region_name : str
        AWS region name (default: 'us-east-1')

    Returns:
    --------
    pandas.DataFrame
        First 5 rows of the CSV file
    """
    # Create S3 client
    s3_client = boto3.client('s3', region_name=region_name)

    try:
        # Get temporary credentials for accessing S3 data using user profile role 
        s3control_client = boto3.client('s3control')
        response = s3control_client.get_data_access(
            AccountId=account_id,
            Target=f's3://{bucket_name}/{file_key}',
            Permission='READ'
        )
        credentials = response['Credentials']
        
        # Create an S3 client with the temporary credentials
        s3_client = boto3.client(
            's3',
            aws_access_key_id=credentials['AccessKeyId'],
            aws_secret_access_key=credentials['SecretAccessKey'],
            aws_session_token=credentials['SessionToken']
        )
        
        objects = s3_client.list_objects(Bucket=bucket_name, Prefix=file_key)
        
        # Read the first part file into a pandas DataFrame
        #first_part_key = f"{output_key_prefix}/part-00000"
        obj = s3_client.get_object(Bucket=bucket_name, Key=file_key)
        data = obj['Body'].read().decode('utf-8')
        df = pd.read_csv(io.StringIO(data), header=None)
        
        # Print the top 5 rows
        print(f"Top 5 rows from s3://{bucket_name}/{file_key}")
        print(df.head())

    except Exception as e:
        print(f"Error reading CSV from S3: {str(e)}")
        raise

In [None]:
os.environ['AWS_STS_REGIONAL_ENDPOINTS'] = 'regional'

In [None]:
# User A accessing their authorized S3 dataset 

#Get region
boto3_session = boto3.Session()
region = boto3_session.region_name

# Get AWS account ID dynamically
sts_client = boto3.client('sts', region_name=region)
account_id = sts_client.get_caller_identity()['Account']

# Replace  with your GrantsBucketName from cloudformation output
bucket_name = "<your-GrantsBucketName-from-cloudformation-output>"
file_key = "UserA/abalone.csv"  # or "UserA/abalone.csv" or "UserB/abalone.csv"

print("=" * 80)
print("=" * 80)

read_s3_csv(account_id, bucket_name, file_key, region)


<div style="padding: 15px; background-color: #d1ecf1; border-left: 5px solid #0c5460; color: #0c5460;">
<strong>ℹ️ Note:</strong> The following cell will throw an error, This is expected as UserA doesn't have permissions to access the dataset of UserB


In [None]:
# User A accessing their unauthorized S3 dataset 

file_key = "UserB/abalone.csv"  # or "UserA/abalone.csv" or "UserB/abalone.csv"

print("=" * 80)
print("=" * 80)

read_s3_csv(account_id, bucket_name, file_key, region)

