<a href="https://colab.research.google.com/github/jaimehdzgt/superstore_project/blob/main/SuperStore_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==== CONFIG ====
REPO_URL   = "https://github.com/jaimehdzgt/superstore_project.git"
BRANCH     = "main"
REPO_NAME  = "superstore_project"
REPO_DIR   = f"/content/{REPO_NAME}"

# Si True, borra cualquier clon previo y vuelve a clonar limpio.
# Si False, sólo hace pull/actualiza sin duplicar.
CLEAN_RUN  = True

# Carpeta en tu Drive y patrón del archivo Excel:
DRIVE_FOLDER   = "/content/drive/MyDrive/SuperStore"
EXCEL_PATTERN  = "Sample - Superstore*.xlsx"   # tolera el espacio antes de .xlsx

print("Config OK:", REPO_URL, "branch:", BRANCH, "clean:", CLEAN_RUN)


import os, sys, glob, subprocess, shutil
from pathlib import Path

def run(cmd, check=True):
    print(">", cmd)
    rc = subprocess.call(cmd, shell=True)
    if check and rc != 0:
        raise RuntimeError(f"Falló: {cmd}")

# 1) Limpiar si se pidió CLEAN_RUN
if CLEAN_RUN and Path(REPO_DIR).exists():
    print(f"Eliminando clon previo: {REPO_DIR}")
    shutil.rmtree(REPO_DIR)

# 2) Clonar o actualizar sin duplicar
if not Path(REPO_DIR).exists():
    run(f"git clone -b {BRANCH} {REPO_URL} {REPO_DIR}")
else:
    # Actualiza el repo existente sin crear carpetas extra
    run(f"git -C {REPO_DIR} fetch origin {BRANCH}")
    run(f"git -C {REPO_DIR} reset --hard origin/{BRANCH}")
    run(f"git -C {REPO_DIR} clean -fdx")  # borra archivos sin trackear dentro del repo (no tu Drive)

# 3) Entrar a notebooks
%cd {REPO_DIR}/notebooks
!ls -la

# 4) Habilitar imports (src/) sólo una vez
if ".." not in sys.path:
    sys.path.append("..")

# 5) Instalar dependencias (idempotente; pip ignora lo ya instalado)
!pip install -q -r ../requirements.txt
print("Entorno listo.")


# Montar Drive sólo si no está montado
from google.colab import drive, files
import os
if not os.path.ismount("/content/drive"):
    drive.mount("/content/drive")

from pathlib import Path
import pandas as pd

# Buscar el Excel por patrón dentro de tu carpeta 'SuperStore'
base = Path(DRIVE_FOLDER)
assert base.exists(), f"No existe la carpeta de Drive: {base}"

candidatos = list(base.glob(EXCEL_PATTERN))
print("Candidatos encontrados:", candidatos)

if not candidatos:
    raise FileNotFoundError(
        f"No se encontró ningún Excel con patrón '{EXCEL_PATTERN}' en {base}.\n"
        "Revisa el nombre del archivo o renómbralo en Drive."
    )

# Toma el primero (ajusta índice si tuvieras varios)
excel_path = str(candidatos[0])
print("excel_path =", excel_path)

# Verificación de lectura (usa openpyxl)
df_head = pd.read_excel(excel_path, engine="openpyxl", nrows=5)
df_head


Config OK: https://github.com/jaimehdzgt/superstore_project.git branch: main clean: True
> git clone -b main https://github.com/jaimehdzgt/superstore_project.git /content/superstore_project
/content/superstore_project/notebooks
total 28
drwxr-xr-x 2 root root 4096 Sep 23 18:23 .
drwxr-xr-x 6 root root 4096 Sep 23 18:23 ..
-rw-r--r-- 1 root root 9377 Sep 23 18:23 01_EDA_Superstore.ipynb
-rw-r--r-- 1 root root 5482 Sep 23 18:23 02_Modeling_Superstore.ipynb
Entorno listo.
Candidatos encontrados: [PosixPath('/content/drive/MyDrive/SuperStore/Sample - Superstore  .xlsx')]
excel_path = /content/drive/MyDrive/SuperStore/Sample - Superstore  .xlsx


Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,2025-11-08,2025-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,2025-11-08,2025-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,2025-06-12,2025-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,2023-10-11,2023-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,2023-10-11,2023-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164
