In [None]:
import os
import boto3
from pathlib import Path
import json

# **Upload Files**

**1. Generar credenciales**

En terminal usando AWS CLI se ejecuta:

In [None]:
"""
aws sts get-session-token --duration-seconds 14400 --output json | Out-File -FilePath "$env:USERPROFILE\aws-temp-creds.json" -Encoding utf8
"""

**2. Cargar credenciales**

Ejecutar la celda para cargar credenciales:

In [None]:
# credenciales
path = Path.home() / "aws-temp-creds.json"

with open(path, "r", encoding="utf-8-sig") as f:
    creds = json.load(f)["Credentials"]

os.environ["AWS_ACCESS_KEY_ID"] = creds["AccessKeyId"]
os.environ["AWS_SECRET_ACCESS_KEY"] = creds["SecretAccessKey"]
os.environ["AWS_SESSION_TOKEN"] = creds["SessionToken"]
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

sts = boto3.client("sts")
print(sts.get_caller_identity())

{'UserId': 'AIDAVNFKRAMYBQN25WWQO', 'Account': '371872301872', 'Arn': 'arn:aws:iam::371872301872:user/ezequiel.coggiola', 'ResponseMetadata': {'RequestId': '55387ba0-0add-4981-9734-03c04bc183e5', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '55387ba0-0add-4981-9734-03c04bc183e5', 'x-amz-sts-extended-request-id': 'MTp1cy1lYXN0LTE6UzoxNzY0ODkxMzA5NjEyOlI6elBnZ2d1eDU=', 'content-type': 'text/xml', 'content-length': '414', 'date': 'Thu, 04 Dec 2025 23:35:09 GMT'}, 'RetryAttempts': 0}}


**3. Conexi√≥n a S3**

Ejecutar celda para iniciar conexi√≥n a S3:

In [None]:
# conexi√≥n a s3

region = os.environ.get("AWS_REGION", "us-east-1")

boto_sess = boto3.Session(region_name=region)

s3 = boto_sess.client("s3")

**4. Variables**

Definir variables usadas para desplegar infraestructura y ejecutar:

In [None]:
usuario = "ezequiel" # Nombre de usuario definido al desplegar terraform
dataset_name = "ecommerce" # Nombre del dataset definido 
bucket_name = f"{dataset_name}--2025" # Nombre del bucket definido
local_folder = "../dataset-ecommerce/month=11" # Carpeta con archivos a subir

**5. Carga de archivos**

Ejecutar para cargar archivos en S3:

In [None]:
def upload_files(local_folder, bucket_name, layer="raw", file_types=None):
    """
    Sube archivos a S3 creando autom√°ticamente una carpeta por dataset.
    Ejemplo:
        raw/customers/customers.csv
        raw/orders/orders.csv
    """
    if file_types is None:
        file_types = ["csv", "parquet"]

    print(f"üìÇ Procesando carga para capa: '{layer}'...")

    for filename in os.listdir(local_folder):

        local_path = os.path.join(local_folder, filename)

        # Validaci√≥n: que sea archivo y que tenga extensi√≥n v√°lida
        if not os.path.isfile(local_path):
            continue
            
        ext = filename.split(".")[-1].lower()
        
        if ext in file_types:
            # Nombre del dataset = filename sin extensi√≥n
            dataset_folder = filename.split(".")[0]

            # S3 path: raw/customers/customers.csv
            s3_key = f"{layer}/{dataset_folder}/{filename}"

            # Tags
            tags = (
                f"layer={layer}&owner=Ezequiel Coggiola&team=Datera"
                f"&env=dev&dataset={dataset_name}"
            )

            try:
                s3.upload_file(
                    local_path,
                    bucket_name,
                    s3_key,
                    ExtraArgs={"Tagging": tags}
                )
                print(f"   ‚úÖ Subido: {filename} -> {s3_key}")
            except Exception as e:
                print(f"   ‚ùå Error subiendo {filename}: {str(e)}")

# Ejemplo de uso:
# upload_files("./mis_datos", bucket_name, layer="raw")

upload_files(local_folder, bucket_name, layer="raw", file_types="csv")

response = s3.list_objects_v2(Bucket=bucket_name, Prefix="raw/2025/")
for obj in response.get("Contents", []):
    print(obj["Key"])

**Listo!**