## 1. Setup

In [None]:
# Imports
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path

# Time series specific
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
warnings.filterwarnings('ignore')

# Custom modules
from data_pipeline import load_curated_data
from eda import plays_over_time

# Config
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("‚úì M√≥dulos importados correctamente")

## 2. Cargar y Preparar Datos

In [None]:
# Cargar datos
data_path = Path('../data/demo/synthetic_spotify_data.parquet')
if not data_path.exists():
    data_path = Path('../data/curated/spotify_data.parquet')

df = pd.read_parquet(data_path)
print(f"‚úì Datos cargados: {df.shape}")
print(f"  Per√≠odo: {df['ts'].min()} a {df['ts'].max()}")

# Preparar serie temporal diaria
daily_plays = df.groupby(df['ts'].dt.date).size().reset_index()
daily_plays.columns = ['date', 'plays']
daily_plays['date'] = pd.to_datetime(daily_plays['date'])
daily_plays = daily_plays.set_index('date').sort_index()

print(f"\nüìä Serie temporal:")
print(f"  D√≠as totales: {len(daily_plays)}")
print(f"  Promedio diario: {daily_plays['plays'].mean():.1f} reproducciones")
print(f"  Desv. est√°ndar: {daily_plays['plays'].std():.1f}")

## 3. Visualizaci√≥n de Tendencia

An√°lisis visual de la serie temporal completa.

In [None]:
# Plot serie temporal
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=daily_plays.index,
    y=daily_plays['plays'],
    mode='lines',
    name='Reproducciones Diarias',
    line=dict(color='#1DB954', width=1.5)
))

# Agregar media m√≥vil de 7 d√≠as
daily_plays['MA_7'] = daily_plays['plays'].rolling(window=7, center=True).mean()
fig.add_trace(go.Scatter(
    x=daily_plays.index,
    y=daily_plays['MA_7'],
    mode='lines',
    name='Media M√≥vil (7 d√≠as)',
    line=dict(color='red', width=2, dash='dash')
))

fig.update_layout(
    title='Serie Temporal: Reproducciones Diarias',
    xaxis_title='Fecha',
    yaxis_title='N√∫mero de Reproducciones',
    hovermode='x unified',
    height=500
)

fig.show()
print("üìà Gr√°fico de tendencia generado")

## 4. Test de Estacionariedad (ADF)

Verificar si la serie es estacionaria usando el test Augmented Dickey-Fuller.

**Interpretaci√≥n:**
- p-value < 0.05 ‚Üí Serie es estacionaria
- p-value >= 0.05 ‚Üí Serie NO es estacionaria (tiene tendencia/estacionalidad)

In [None]:
# Test ADF
def adf_test(series, name=''):
    result = adfuller(series.dropna())
    print(f'=== ADF Test: {name} ===')
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print(f'Critical Values:')
    for key, value in result[4].items():
        print(f'  {key}: {value:.3f}')
    
    if result[1] <= 0.05:
        print("‚úì Serie ES estacionaria (rechazamos H0)")
    else:
        print("‚ö†Ô∏è Serie NO ES estacionaria (no rechazamos H0)")
    print()

adf_test(daily_plays['plays'], 'Reproducciones Diarias')

## 5. Descomposici√≥n Estacional

Descomponer la serie en componentes: **Tendencia + Estacionalidad + Residual**

In [None]:
# Descomposici√≥n estacional (usar multiplicativo si hay varianza creciente)
decomposition = seasonal_decompose(
    daily_plays['plays'], 
    model='additive',  # o 'multiplicative'
    period=7  # estacionalidad semanal
)

# Plot componentes
fig, axes = plt.subplots(4, 1, figsize=(14, 10))

# Original
axes[0].plot(daily_plays.index, daily_plays['plays'], color='#1DB954')
axes[0].set_ylabel('Original')
axes[0].set_title('Descomposici√≥n de Serie Temporal', fontsize=14, fontweight='bold')

# Tendencia
axes[1].plot(decomposition.trend.index, decomposition.trend, color='orange')
axes[1].set_ylabel('Tendencia')

# Estacionalidad
axes[2].plot(decomposition.seasonal.index, decomposition.seasonal, color='blue')
axes[2].set_ylabel('Estacionalidad')

# Residual
axes[3].plot(decomposition.resid.index, decomposition.resid, color='red', alpha=0.7)
axes[3].set_ylabel('Residual')
axes[3].set_xlabel('Fecha')

plt.tight_layout()
plt.show()

print("üìä Descomposici√≥n estacional completada")

## 6. ACF y PACF

Analizar autocorrelaci√≥n para identificar:
- **ACF**: Orden del componente MA (Moving Average)
- **PACF**: Orden del componente AR (Autoregressive)

In [None]:
# ACF y PACF
fig, axes = plt.subplots(2, 1, figsize=(12, 8))

# ACF
plot_acf(daily_plays['plays'].dropna(), lags=40, ax=axes[0])
axes[0].set_title('Autocorrelation Function (ACF)', fontsize=12, fontweight='bold')

# PACF
plot_pacf(daily_plays['plays'].dropna(), lags=40, ax=axes[1])
axes[1].set_title('Partial Autocorrelation Function (PACF)', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print("üìä ACF y PACF graficados")

## 7. An√°lisis de Patrones Semanales

Identificar patrones por d√≠a de la semana.

In [None]:
# Agregar d√≠a de la semana
daily_plays['day_of_week'] = daily_plays.index.dayofweek
daily_plays['day_name'] = daily_plays.index.day_name()

# Boxplot por d√≠a
fig = px.box(
    daily_plays.reset_index(),
    x='day_name',
    y='plays',
    title='Distribuci√≥n de Reproducciones por D√≠a de la Semana',
    labels={'day_name': 'D√≠a', 'plays': 'Reproducciones'},
    category_orders={'day_name': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']}
)
fig.update_traces(marker_color='#1DB954')
fig.show()

# Stats por d√≠a
print("\nüìä Estad√≠sticas por d√≠a:")
print(daily_plays.groupby('day_name')['plays'].agg(['mean', 'std', 'min', 'max']).round(1))

## 8. Detecci√≥n de Anomal√≠as

Identificar d√≠as con comportamiento at√≠pico usando Z-score.

In [None]:
# Calcular Z-score
daily_plays['z_score'] = (daily_plays['plays'] - daily_plays['plays'].mean()) / daily_plays['plays'].std()
daily_plays['is_anomaly'] = daily_plays['z_score'].abs() > 3

# Anomal√≠as detectadas
anomalies = daily_plays[daily_plays['is_anomaly']]
print(f"üîç Anomal√≠as detectadas: {len(anomalies)}")
if len(anomalies) > 0:
    print("\nD√≠as an√≥malos:")
    print(anomalies[['plays', 'z_score']].sort_values('z_score', ascending=False))

# Visualizar
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=daily_plays.index,
    y=daily_plays['plays'],
    mode='lines',
    name='Serie Normal',
    line=dict(color='#1DB954')
))

if len(anomalies) > 0:
    fig.add_trace(go.Scatter(
        x=anomalies.index,
        y=anomalies['plays'],
        mode='markers',
        name='Anomal√≠as',
        marker=dict(color='red', size=10, symbol='x')
    ))

fig.update_layout(
    title='Detecci√≥n de Anomal√≠as (Z-score > 3)',
    xaxis_title='Fecha',
    yaxis_title='Reproducciones',
    height=500
)

fig.show()

## 9. Conclusiones

### Hallazgos Clave

1. **Tendencia**: [Describir si hay tendencia creciente/decreciente/estable]

2. **Estacionalidad**: Patr√≥n semanal evidente
   - D√≠as con m√°s actividad: [weekdays/weekends]
   - Variaci√≥n d√≠a a d√≠a: [high/moderate/low]

3. **Estacionariedad**: 
   - ADF test indica: [estacionaria/no estacionaria]
   - Requiere diferenciaci√≥n: [s√≠/no]

4. **Anomal√≠as**: {len(anomalies)} d√≠as at√≠picos detectados

### Recomendaciones para Forecasting

- **Modelo sugerido**: SARIMA o Prophet
- **Par√°metros iniciales**: (p, d, q) basados en ACF/PACF
- **Estacionalidad**: Per√≠odo semanal (7 d√≠as)
- **Features adicionales**: D√≠a de semana, eventos especiales

**Pr√≥ximo paso:** Implementar modelo de forecasting en an√°lisis avanzado