In [1]:
import boto3
import pandas as pd
from fhir.resources.patient import Patient
from snowflake.connector import connect

# AWS S3 Configuration
s3 = boto3.client('s3')
bucket_name = 'your-s3-bucket'
raw_data_key = 'raw/healthcare_data.csv'

# Snowflake Configuration
snowflake_connection = connect(
    user='your_user',
    password='your_password',
    account='your_account',
    warehouse='your_warehouse',
    database='your_database',
    schema='your_schema'
)

# Step 1: Extract Data from S3
def extract_data_from_s3():
    obj = s3.get_object(Bucket=bucket_name, Key=raw_data_key)
    raw_data = pd.read_csv(obj['Body'])
    print(f"Data extracted from S3 with {len(raw_data)} records.")
    return raw_data

# Step 2: Transform Data (FHIR Standardization)
def transform_data_to_fhir(raw_data):
    fhir_data = []
    for _, row in raw_data.iterrows():
        patient = Patient.construct(
            id=row['patient_id'],
            name=[{"family": row['last_name'], "given": [row['first_name']]}],
            gender=row['gender'],
            birthDate=row['dob']
        )
        fhir_data.append(patient.dict())
    fhir_df = pd.DataFrame(fhir_data)
    print("Data transformed to FHIR standard.")
    return fhir_df

# Step 3: Load Data into Snowflake
def load_data_to_snowflake(fhir_df):
    cursor = snowflake_connection.cursor()
    cursor.execute("CREATE OR REPLACE TABLE healthcare_data (id STRING, name STRING, gender STRING, birthDate STRING)")
    for _, row in fhir_df.iterrows():
        insert_query = f"""
        INSERT INTO healthcare_data (id, name, gender, birthDate)
        VALUES ('{row['id']}', '{row['name']}', '{row['gender']}', '{row['birthDate']}')
        """
        cursor.execute(insert_query)
    cursor.close()
    print("Data loaded into Snowflake.")

# Main Pipeline Execution
if __name__ == "__main__":
    # Step 1: Extract
    raw_data = extract_data_from_s3()
    
    # Step 2: Transform
    fhir_transformed_data = transform_data_to_fhir(raw_data)
    
    # Step 3: Load
    load_data_to_snowflake(fhir_transformed_data)
    
    print("ETL Pipeline completed successfully.")


InterfaceError: 250003 (08001): 404 Not Found: post https://your_account.snowflakecomputing.com:443/session/v1/login-request?request_id=cb79d36c-507f-4212-a5a1-ed20eb30bf50&databaseName=your_database&schemaName=your_schema&warehouse=your_warehouse&request_guid=f1a9f621-812e-42fd-9bca-c41d40e5d0cb