# Exploratory Data Analysis

<img src="../../reports/figures/logo_cmsr92.png" alt="CMSR92" height="60" style="display:block;margin:auto;">

**Análisis exploratorio profesional de los datos del cine.**

> Este notebook utiliza visualizaciones con colores profesionales y destaca insights clave para la toma de decisiones ejecutivas. El logo CMSR92 se muestra solo en la cabecera.

In [1]:
# Importaciones básicas
import os, pandas as pd, numpy as np
import seaborn as sns, matplotlib.pyplot as plt
import plotly.express as px
from pathlib import Path

# Módulos del proyecto
import sys

# Resolver raíz del proyecto de forma robusta
ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.data_processing import load_data, basic_clean
from src.visualization import plot_price_sensitivity

# Carga de datos crudos con ruta absoluta al proyecto
RAW_PATH = ROOT / 'data' / 'raw' / 'movie_theatre_sales.csv'
try:
    _df_raw = load_data(str(RAW_PATH))
    print('Registros crudos:', len(_df_raw), '| Ruta:', RAW_PATH)
except Exception as e:
    print('AVISO:', e, '| Ruta probada:', RAW_PATH)
    _df_raw = pd.DataFrame()

# Verificación rápida de contexto de ejecución
print('cwd:', os.getcwd())
print('ROOT:', ROOT)
print('CSV existe?', RAW_PATH.exists(), '->', RAW_PATH)

_df_raw.head() if not _df_raw.empty else _df_raw

Registros crudos: 1440 | Ruta: C:\Users\Carlos\OneDrive\Documents\GitHub\cine\data\raw\movie_theatre_sales.csv
cwd: C:\Users\Carlos\OneDrive\Documents\GitHub\cine\notebooks
ROOT: C:\Users\Carlos\OneDrive\Documents\GitHub\cine
CSV existe? True -> C:\Users\Carlos\OneDrive\Documents\GitHub\cine\data\raw\movie_theatre_sales.csv


Unnamed: 0,Ticket_ID,Age,Ticket_Price,Movie_Genre,Seat_Type,Number_of_Person,Purchase_Again
0,N4369,55,12.27,Comedy,Standard,7,No
1,B8091,35,19.02,Drama,Standard,Alone,Yes
2,V6341,55,22.52,Horror,VIP,3,No
3,B3243,53,23.01,Drama,Standard,6,Yes
4,I3814,30,21.81,Comedy,VIP,4,Yes


In [2]:
# Limpieza mínima para EDA
if not _df_raw.empty:
    df = basic_clean(_df_raw)
    from IPython.display import display
    display(df.describe(include='all'))
    fig = plot_price_sensitivity(df, 'Ticket_Price')
    # Mostrar la figura de forma compatible con backends no interactivos
    display(fig)
    plt.close(fig)
else:
    print('Sin datos para EDA. Coloca el CSV en data/raw/movie_theatre_sales.csv')

Unnamed: 0,Ticket_ID,Age,Ticket_Price,Movie_Genre,Seat_Type,Number_of_Person,Purchase_Again
count,745,745.0,745.0,745,745,745.0,745.0
unique,745,,,5,3,,
top,W6360,,,Comedy,Vip,,
freq,1,,,155,259,,
mean,,38.985235,17.310255,,,4.637584,0.479195
std,,12.288908,4.374432,,,1.703944,0.499903
min,,18.0,10.01,,,2.0,0.0
25%,,28.0,13.62,,,3.0,0.0
50%,,40.0,17.0,,,5.0,0.0
75%,,49.0,21.31,,,6.0,1.0


<Figure size 800x400 with 1 Axes>