In [1]:
import pandas as pd
import os
from pathlib import Path
import boto3
from io import BytesIO
import logging
from botocore.exceptions import ClientError

In [2]:

def get_aws_credentials():
    try:
        # Intenta obtener credenciales de variables de ambiente primero
        session = boto3.Session(
            aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
            aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
            region_name=os.getenv('AWS_REGION', 'us-east-1')
        )
        logging.info("Credenciales de AWS obtenidas de variables de ambiente.")

        return session
    
    except ClientError as e:
        logging.error(f"Error al obtener credenciales de AWS: {str(e)}")
        raise 
    


In [3]:
aws_session = get_aws_credentials()

In [12]:

s3_client = aws_session.client('s3')
response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/erp_policies.parquet')
df_policies = pd.read_parquet(BytesIO(response['Body'].read()))
df_policies.head()


Unnamed: 0,policy_id,client_id,vehicle_id,coverage,status,premium
0,18922b87,1bc34e64,774ef45b,Premium,Activa,2894.73
1,f3e3b9d7,61e074a6,93b83dd3,Básica,Vencida,1856.94
2,01696bee,f9b5f135,458762d7,Intermedia,Cancelada,1800.8
3,f76f510d,a74cd795,6b570cd4,Intermedia,Vencida,1603.19
4,833fe1f1,8c124a9b,8c5ef353,Intermedia,Vencida,1591.14


In [13]:
response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/erp_vehicles.parquet')
df_vehicles = pd.read_parquet(BytesIO(response['Body'].read()))
df_vehicles.head()

Unnamed: 0,vehicle_id,client_id,brand,model,year,plate
0,a901d392,8c8c4da2,Chevrolet,Cruze,2001,949XYZ
1,f6cd9749,67e657fb,Toyota,Cruze,2020,517XYZ
2,54c57cb5,83fefb39,Honda,F-150,2000,864XYZ
3,0c8c5866,04a7617e,Chevrolet,Cruze,2012,673XYZ
4,57850382,41adac87,Chevrolet,Civic,2012,988XYZ


In [14]:
response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/erp_claims.parquet')
df_claims = pd.read_parquet(BytesIO(response['Body'].read()))
df_claims.head()

Unnamed: 0,claim_id,policy_id,claim_date,claim_type,amount
0,89f4d61f,d6efbe83,2023-02-02,Daños Por Clima,4095.24
1,bbfb9056,85fc804e,2020-08-11,Robo,14055.83
2,0d07fc85,bbb1c117,2024-10-17,Robo,6884.64
3,71c75072,a8b4eff8,2020-05-27,Otros,7920.82
4,50539161,011793b7,2020-09-27,Daños Por Clima,3415.67


In [16]:
response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/erp_payments.parquet')
df_payments = pd.read_parquet(BytesIO(response['Body'].read()))
df_payments.head()

Unnamed: 0,payment_id,policy_id,amount,payment_date
0,639638f4,31f816c6,2131.07,2022-07-09
1,d08218cf,4bd0562c,2029.21,2024-09-17
2,2b7e5c22,65e15ba6,375.82,2022-06-11
3,1a60f7bd,5103529d,2656.2,2021-01-25
4,08948c86,059257b6,987.97,2020-08-27


In [21]:
### ANALIZE CLIENTS
response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/erp_clients.parquet')
df_clients = pd.read_parquet(BytesIO(response['Body'].read()))
df_clients.head()

response = s3_client.get_object(Bucket='wd-insurance-datalake', Key='silver/crm_clients.parquet')
df_crm = pd.read_parquet(BytesIO(response['Body'].read()))
df_crm.columns




Index(['client_id', 'name', 'email', 'phone', 'address', 'iban_account_number',
       'company_name', 'client_type', 'risk_level', 'marketing_opt_in'],
      dtype='object')

In [18]:
df_clients.head()

Unnamed: 0,client_id,name,email,phone,address
0,13f38393,Johnny Henderson,Campbellkristen@Example.Com,001-212-876-82136115,"47113 Hansen Squares\nPort Ashleeshire, Hi 17392"
1,f450be60,Keith White,Sally27@Example.Org,5387984382,"490 Sarah Mountains Apt. 804\nPattersonland, A..."
2,85b37a7d,Nicole Tate,Usmith@Example.Com,+1-458-941-514704422,"Psc 2384, Box 8489\nApo Aa 73955"
3,a3f72bdd,Debra Robinson,Markliu@Example.Net,,"120 Peter Pines\nNew Christopher, Nm 35080"
4,d88cbf82,Hannah Davis,Kathleen55@Example.Com,+1-476-968-439376457,"528 Sampson Walk\nPort Sarah, Mo 15043"
