In [1]:
# distinct_requirements: pip install faker
import os
import random
from faker import Faker


In [2]:

# Initialize Faker
fake = Faker()

def generate_claim_text_file(file_index, output_dir="generated_claims"):
    # 1. Generate Synthetic Data
    name = fake.name()
    policy_number = f"POL-{random.randint(100000, 999999)}"
    incident_date = fake.date_this_year().strftime("%Y-%m-%d")
    
    # Generate a realistic claim amount (as a string with currency format)
    amount_val = round(random.uniform(500.00, 15000.00), 2)
    claim_amount = f"${amount_val:,.2f}"
    
    # Generate a context-aware description
    damage_scenarios = [
        ("rear-ended", "bumper and trunk"),
        ("side-swiped", "driver side door and mirror"),
        ("hit by hail", "hood and roof"),
        ("backed into a pole", "rear taillight and fender")
    ]
    scenario, part = random.choice(damage_scenarios)
    description = (
        f"I was driving on {fake.street_name()} when I was {scenario}. "
        f"The {part} has significant damage. "
        f"There were no injuries, but the car is not drivable."
    )

    # 2. Format as a Plain Text Document
    # This structure mimics a filled-out web form or email
    document_content = f"""
INSURANCE CLAIM SUBMISSION FORM
===============================

CLAIMANT DETAILS
----------------
Full Name: {name}
Policy Number: {policy_number}

INCIDENT INFORMATION
--------------------
Date of Incident: {incident_date}
Estimated Repair Cost (Claim Amount): {claim_amount}

DESCRIPTION OF INCIDENT
-----------------------
{description}

-----------------------
End of Document
    """

    # 3. Save to file
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    filename = f"{output_dir}/claim{file_index}.txt"
    
    with open(filename, "w", encoding="utf-8") as f:
        f.write(document_content)
    
    print(f"Generated: {filename}")

# Generate 5 sample text files
if __name__ == "__main__":
    print("Generating synthetic text claims...")
    for i in range(1, 6):
        generate_claim_text_file(i)
    print("\nDone! Upload the 'generated_claims' folder contents to your S3 bucket.")

Generating synthetic text claims...
Generated: generated_claims/claim1.txt
Generated: generated_claims/claim2.txt
Generated: generated_claims/claim3.txt
Generated: generated_claims/claim4.txt
Generated: generated_claims/claim5.txt

Done! Upload the 'generated_claims' folder contents to your S3 bucket.


In [3]:
import boto3
from pathlib import Path

# Initialize S3 client
s3_client = boto3.client('s3', region_name='us-east-1')

# Configuration
local_folder = "generated_claims"
s3_bucket = "cert-genai-dev"
s3_prefix = "bonus_1.1/"

# Upload all text files from the generated_claims folder
def upload_claims_to_s3():
    """Upload all claim text files to S3 bucket"""
    local_path = Path(local_folder)
    
    if not local_path.exists():
        print(f"Error: Folder '{local_folder}' does not exist. Please generate claims first.")
        return
    
    # Get all .txt files in the folder
    txt_files = list(local_path.glob("*.txt"))
    
    if not txt_files:
        print(f"No .txt files found in '{local_folder}' folder.")
        return
    
    print(f"Found {len(txt_files)} file(s) to upload to s3://{s3_bucket}/{s3_prefix}")
    print("-" * 60)
    
    for file_path in txt_files:
        # Construct S3 key
        s3_key = f"{s3_prefix}{file_path.name}"
        
        try:
            # Upload file
            s3_client.upload_file(
                Filename=str(file_path),
                Bucket=s3_bucket,
                Key=s3_key
            )
            print(f"✓ Uploaded: {file_path.name} -> s3://{s3_bucket}/{s3_key}")
        except Exception as e:
            print(f"✗ Failed to upload {file_path.name}: {str(e)}")
    
    print("-" * 60)
    print(f"Upload complete! Files are now in s3://{s3_bucket}/{s3_prefix}")

# Run the upload
if __name__ == "__main__":
    upload_claims_to_s3()


Found 5 file(s) to upload to s3://cert-genai-dev/bonus_1.1/
------------------------------------------------------------
✓ Uploaded: claim1.txt -> s3://cert-genai-dev/bonus_1.1/claim1.txt
✓ Uploaded: claim1.txt -> s3://cert-genai-dev/bonus_1.1/claim1.txt
✓ Uploaded: claim2.txt -> s3://cert-genai-dev/bonus_1.1/claim2.txt
✓ Uploaded: claim2.txt -> s3://cert-genai-dev/bonus_1.1/claim2.txt
✓ Uploaded: claim3.txt -> s3://cert-genai-dev/bonus_1.1/claim3.txt
✓ Uploaded: claim3.txt -> s3://cert-genai-dev/bonus_1.1/claim3.txt
✓ Uploaded: claim4.txt -> s3://cert-genai-dev/bonus_1.1/claim4.txt
✓ Uploaded: claim4.txt -> s3://cert-genai-dev/bonus_1.1/claim4.txt
✓ Uploaded: claim5.txt -> s3://cert-genai-dev/bonus_1.1/claim5.txt
------------------------------------------------------------
Upload complete! Files are now in s3://cert-genai-dev/bonus_1.1/
✓ Uploaded: claim5.txt -> s3://cert-genai-dev/bonus_1.1/claim5.txt
------------------------------------------------------------
Upload complete! Files