In [None]:
import hashlib
from google.cloud import storage
import requests
import pandas as pd
import os
import glob
from datetime import datetime
import json

PROJECT_ID = "nbaproject-469714"
BUCKET_NAME = "nba_bucket_datasets"
LOCAL_CSV_FOLDER = r"D:\Up\New\Estudios\SoyHenry\Data Analytics\ProyectoFinal\csv"
LOG_FILE = "upload_log.txt"
METADATA_FILE = "bucket_file_metadata.json"

AUTOMATIC_SOURCES = [
    {"type": "api", "url": "https://api-nba.com/stats.csv"},
    {"type": "gsheet", "url": "https://docs.google.com/spreadsheets/d/xxxx/export?format=csv"}
]

# Cargar metadatos anteriores
if os.path.exists(METADATA_FILE):
    with open(METADATA_FILE, "r") as f:
        bucket_metadata = json.load(f)
else:
    bucket_metadata = {}

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(message)
    with open(LOG_FILE, "a") as f:
        f.write(f"{timestamp} - {message}\n")

def file_hash(path):
    h = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            h.update(chunk)
    return h.hexdigest()

def upload_to_bucket(bucket_name, destination_blob_name, source_file):
    client = storage.Client(PROJECT_ID)
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    local_hash = file_hash(source_file)

    # Verificar cambios con metadatos
    previous_hash = bucket_metadata.get(destination_blob_name)
    if previous_hash == local_hash:
        log_message(f"⚠️ {destination_blob_name} ya existe y no cambió, se omitirá")
        return False

    blob.upload_from_filename(source_file)
    log_message(f"✅ {source_file} subido a gs://{bucket_name}/{destination_blob_name}")

    # Actualizar metadatos
    bucket_metadata[destination_blob_name] = local_hash
    with open(METADATA_FILE, "w") as f:
        json.dump(bucket_metadata, f)

    return True

def process_csv_file(path):
    file_name = os.path.basename(path)
    return upload_to_bucket(BUCKET_NAME, file_name, path)

def process_api(url):
    local_file = "temp_api_data.csv"
    try:
        r = requests.get(url)
        r.raise_for_status()
        with open(local_file, "wb") as f:
            f.write(r.content)
        return upload_to_bucket(BUCKET_NAME, local_file, local_file)
    except Exception as e:
        log_message(f"⚠️ Error al descargar API {url}: {e}")
        return False
    finally:
        if os.path.exists(local_file):
            os.remove(local_file)

def process_gsheet(sheet_url):
    local_file = "temp_gsheet_data.csv"
    try:
        df = pd.read_csv(sheet_url)
        df.to_csv(local_file, index=False)
        return upload_to_bucket(BUCKET_NAME, local_file, local_file)
    except Exception as e:
        log_message(f"⚠️ Error al descargar Google Sheet {sheet_url}: {e}")
        return False
    finally:
        if os.path.exists(local_file):
            os.remove(local_file)

def main():
    # 🔹 CSV locales
    for csv_file in glob.glob(os.path.join(LOCAL_CSV_FOLDER, "*.csv")):
        process_csv_file(csv_file)

    # 🔹 APIs y Google Sheets
    for source in AUTOMATIC_SOURCES:
        if source["type"] == "api":
            process_api(source["url"])
        elif source["type"] == "gsheet":
            process_gsheet(source["url"])

if __name__ == "__main__":
    main()
