# Data Cleaning & Preparation

<img src="../../reports/figures/logo_cmsr92.png" alt="CMSR92" height="60" style="display:block;margin:auto;">

**Preparación profesional de datos para análisis y modelado.**

> Visualizaciones y cuadros de insights usan colores neutros y profesionales, manteniendo el logo CMSR92 solo en la cabecera.

In [1]:
import os, pandas as pd
from pathlib import Path
import sys

# Resolver raíz del proyecto de forma robusta

def find_root(start=None):
    p = Path(start or Path.cwd()).resolve()
    for _ in range(6):
        if (p / 'requirements.txt').exists() and (p / 'src').exists():
            return p
        if (p / '.git').exists() and (p / 'src').exists():
            return p
        p = p.parent
    return Path.cwd()

ROOT = find_root()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.data_processing import load_and_clean
from src.feature_engineering import build_features_pipeline

RAW_PATH = ROOT / 'data' / 'raw' / 'movie_theatre_sales.csv'
print('cwd:', os.getcwd())
print('ROOT:', ROOT)
print('CSV existe?', RAW_PATH.exists(), '->', RAW_PATH)

try:
    df_clean, path_clean = load_and_clean(str(RAW_PATH))
    print('Guardado limpio:', path_clean, '| filas:', len(df_clean))
    feat, path_feat = build_features_pipeline(df_clean)
    print('Guardado features:', path_feat, '| columnas:', len(feat.columns))
except Exception as e:
    print('AVISO:', e)

cwd: C:\Users\Carlos\OneDrive\Documents\GitHub\cine\notebooks
ROOT: C:\Users\Carlos\OneDrive\Documents\GitHub\cine
CSV existe? True -> C:\Users\Carlos\OneDrive\Documents\GitHub\cine\data\raw\movie_theatre_sales.csv
Guardado limpio: data\processed\cleaned_data.csv | filas: 745
Guardado features: data\processed\model_features.csv | columnas: 17
