## 1. Setup e Importaci√≥n

In [None]:
# Imports
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path

# Custom modules
from recommendations import TrackRecommender, create_recommender_from_data
from playlist_generator import PlaylistGenerator, create_playlist_generator
from models import (
    recommendation_score, 
    evaluate_recommendation_quality,
    plot_recommendation_evaluation,
    analyze_playlist_diversity,
    plot_playlist_summary
)

# Config
pd.set_option('display.max_columns', None)
sns.set_style("whitegrid")
np.random.seed(42)

print("‚úì M√≥dulos importados correctamente")

## 2. Cargar Datos

In [None]:
# Cargar datos
data_path = Path('../data/features/listening_history_with_features.parquet')

if not data_path.exists():
    print("‚ö†Ô∏è Usando datos sint√©ticos de demo")
    data_path = Path('../data/demo/synthetic_spotify_data.parquet')

df = pd.read_parquet(data_path)
print(f"‚úì Datos cargados: {df.shape}")
print(f"\nColumnas disponibles:")
print(df.columns.tolist())

# Info b√°sica
print(f"\nüìä Informaci√≥n del dataset:")
print(f"  Total reproducciones: {len(df):,}")
print(f"  Tracks √∫nicos: {df['spotify_track_uri'].nunique():,}")
if 'master_metadata_album_artist_name' in df.columns:
    print(f"  Artistas √∫nicos: {df['master_metadata_album_artist_name'].nunique():,}")
print(f"  Per√≠odo: {df['ts'].min()} a {df['ts'].max()}")

## 3. Sistema de Recomendaci√≥n

### 3.1 Inicializar Recomendador

In [None]:
# Crear recomendador
recommender = create_recommender_from_data(df)

print("\nüìà Features de tracks construidos:")
print(recommender.track_features.head())

print("\nüë§ Perfil de usuario:")
for key, value in recommender.user_profile.items():
    if isinstance(value, (list, dict)) and len(str(value)) > 100:
        print(f"  {key}: [datos extensos...]")
    else:
        print(f"  {key}: {value}")

### 3.2 Tracks Similares

Encontrar tracks similares a uno espec√≠fico.

In [None]:
# Elegir un track popular para el ejemplo
popular_track = df['spotify_track_uri'].value_counts().index[0]

print(f"üéµ Buscando tracks similares a: {popular_track}")

# Obtener tracks similares
similar_tracks = recommender.get_similar_tracks(popular_track, n=10)

print("\n‚ú® Top 10 tracks similares:")
display_cols = ['spotify_track_uri', 'track_name', 'artist', 'similarity_score', 
                'play_count', 'completion_rate']
display_cols = [c for c in display_cols if c in similar_tracks.columns]
print(similar_tracks[display_cols])

### 3.3 Recomendaciones Contextuales

Recomendar tracks basados en hora del d√≠a y d√≠a de la semana.

In [None]:
# Recomendaciones para diferentes contextos
from datetime import datetime

# Hora actual
current_hour = datetime.now().hour
current_day = datetime.now().weekday()

print(f"üïê Hora actual: {current_hour}h, D√≠a: {current_day} (0=Lunes)")

# Recomendaciones para ahora
recs_now = recommender.recommend_for_context(
    hour=current_hour,
    day_of_week=current_day,
    n=15
)

print("\n‚ú® Recomendaciones para AHORA:")
display_cols = ['track_name', 'artist', 'final_score', 'popularity', 'completion_rate']
display_cols = [c for c in display_cols if c in recs_now.columns]
print(recs_now[display_cols].head(10))

# Comparar con recomendaciones para la ma√±ana
recs_morning = recommender.recommend_for_context(hour=8, n=10)

print("\n‚òÄÔ∏è Recomendaciones para la MA√ëANA (8am):")
print(recs_morning[display_cols].head(10))

### 3.4 Recomendaciones Basadas en Favoritos

In [None]:
# Recomendar basado en favoritos
recs_favorites = recommender.recommend_based_on_favorites(n=20)

print("üíñ Recomendaciones basadas en tus favoritos:")
display_cols = ['track_name', 'artist', 'recommendation_score', 
                'similarity_score', 'popularity']
display_cols = [c for c in display_cols if c in recs_favorites.columns]
print(recs_favorites[display_cols].head(15))

### 3.5 Tracks Resistentes a Skips

In [None]:
# Tracks con baja probabilidad de skip
recs_skip_resistant = recommender.recommend_skip_resistant(n=20)

print("üéØ Tracks que nunca querr√°s skipear:")
display_cols = ['track_name', 'artist', 'skip_resistant_score', 
                'completion_rate', 'skip_rate', 'play_count']
display_cols = [c for c in display_cols if c in recs_skip_resistant.columns]
print(recs_skip_resistant[display_cols].head(15))

### 3.6 Recomendaciones H√≠bridas

Combina m√∫ltiples estrategias para mejores resultados.

In [None]:
# Recomendaciones h√≠bridas
recs_hybrid = recommender.get_recommendations(
    strategy='hybrid',
    n=25,
    hour=current_hour,
    day_of_week=current_day
)

print("üåü Recomendaciones H√çBRIDAS (mejor balance):")
display_cols = ['track_name', 'artist', 'hybrid_score', 'popularity', 'completion_rate']
display_cols = [c for c in display_cols if c in recs_hybrid.columns]
print(recs_hybrid[display_cols].head(20))

### 3.7 Evaluaci√≥n de Recomendaciones

In [None]:
# Evaluar calidad de recomendaciones
eval_metrics = recommender.evaluate_recommendations(recs_hybrid)

print("üìä M√©tricas de calidad:")
for metric, value in eval_metrics.items():
    if isinstance(value, float):
        print(f"  {metric}: {value:.2%}")
    else:
        print(f"  {metric}: {value}")

# Visualizar
if len(eval_metrics) > 0:
    fig = plot_recommendation_evaluation(eval_metrics)
    fig.show()

## 4. Generaci√≥n Autom√°tica de Playlists

### 4.1 Inicializar Generador

In [None]:
# Crear generador de playlists
generator = create_playlist_generator(df, generate_all=True)

### 4.2 Listar Playlists Generadas

In [None]:
# Ver todas las playlists
playlists = generator.list_playlists()

print(f"\nüéµ {len(playlists)} playlists generadas:\n")
for name, count in playlists:
    print(f"  üìÄ {name}: {count} tracks")

### 4.3 Explorar Playlists Espec√≠ficas

In [None]:
# Morning Energy playlist
if 'Morning Energy' in generator.playlists:
    morning_playlist = generator.get_playlist('Morning Energy')
    
    print("‚òÄÔ∏è MORNING ENERGY PLAYLIST:")
    display_cols = ['track_name', 'artist', 'typical_hour', 'play_count', 'completion_rate']
    display_cols = [c for c in display_cols if c in morning_playlist.columns]
    print(morning_playlist[display_cols].head(15))
    
    # Analizar diversidad
    diversity = analyze_playlist_diversity(morning_playlist)
    print("\nüìä An√°lisis de diversidad:")
    for key, value in diversity.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.2f}")
        else:
            print(f"  {key}: {value}")

In [None]:
# Never Skip Hits playlist
if 'Never Skip Hits' in generator.playlists:
    never_skip = generator.get_playlist('Never Skip Hits')
    
    print("üéØ NEVER SKIP HITS PLAYLIST:")
    display_cols = ['track_name', 'artist', 'skip_rate', 'play_count', 'completion_rate']
    display_cols = [c for c in display_cols if c in never_skip.columns]
    print(never_skip[display_cols].head(15))
    
    # Visualizar
    if len(never_skip) > 0:
        fig = plot_playlist_summary(never_skip, title='Never Skip Hits - Resumen')
        fig.show()

In [None]:
# All-Time Favorites
if 'All-Time Favorites' in generator.playlists:
    favorites = generator.get_playlist('All-Time Favorites')
    
    print("üíé ALL-TIME FAVORITES PLAYLIST:")
    display_cols = ['track_name', 'artist', 'play_count', 'typical_hour']
    display_cols = [c for c in display_cols if c in favorites.columns]
    print(favorites[display_cols].head(20))
    
    # Top artistas
    if 'artist' in favorites.columns:
        print("\nüé§ Top artistas en favoritos:")
        top_artists = favorites['artist'].value_counts().head(10)
        print(top_artists)

### 4.4 Comparar Playlists

In [None]:
# Comparar playlists temporales
temporal_playlists = ['Morning Energy', 'Afternoon Vibes', 'Evening Chill', 'Late Night']

comparison_data = []
for name in temporal_playlists:
    if name in generator.playlists:
        playlist = generator.get_playlist(name)
        diversity = analyze_playlist_diversity(playlist)
        
        comparison_data.append({
            'Playlist': name,
            'N Tracks': diversity.get('n_tracks', 0),
            'N Artists': diversity.get('n_artists', 0),
            'Artist Diversity': diversity.get('artist_diversity', 0),
            'Avg Skip Rate': diversity.get('avg_skip_rate', 0)
        })

if comparison_data:
    comparison_df = pd.DataFrame(comparison_data)
    print("\nüìä Comparaci√≥n de playlists temporales:")
    print(comparison_df)
    
    # Visualizar
    fig = px.bar(comparison_df, x='Playlist', y='Artist Diversity',
                 title='Diversidad de Artistas por Playlist',
                 color='Artist Diversity',
                 color_continuous_scale='Viridis')
    fig.update_layout(yaxis_tickformat='.0%')
    fig.show()

### 4.5 An√°lisis de Clusters

In [None]:
# Analizar playlists de clustering
cluster_playlists = [name for name in generator.playlists.keys() 
                     if 'Cluster Mix' in name]

if cluster_playlists:
    print(f"\nüß© {len(cluster_playlists)} playlists de clustering generadas:")
    
    for name in cluster_playlists[:3]:  # Mostrar primeros 3
        playlist = generator.get_playlist(name)
        
        print(f"\n{'='*60}")
        print(f"{name}")
        print(f"{'='*60}")
        
        # Caracter√≠sticas del cluster
        if 'typical_hour' in playlist.columns:
            print(f"  Hora t√≠pica: {playlist['typical_hour'].mean():.1f}h")
        if 'avg_duration' in playlist.columns:
            print(f"  Duraci√≥n promedio: {playlist['avg_duration'].mean()/60000:.1f} min")
        if 'skip_rate' in playlist.columns:
            print(f"  Skip rate promedio: {playlist['skip_rate'].mean():.1%}")
        
        # Muestra de tracks
        display_cols = ['track_name', 'artist', 'play_count']
        display_cols = [c for c in display_cols if c in playlist.columns]
        print(f"\n  Top 5 tracks:")
        print(playlist[display_cols].head(5).to_string(index=False))

## 5. Exportar Resultados

### 5.1 Exportar Recomendaciones

In [None]:
# Exportar recomendaciones h√≠bridas
output_dir = Path('../data/features')
output_dir.mkdir(parents=True, exist_ok=True)

# Guardar recomendaciones
recs_output = output_dir / 'recommendations_hybrid.parquet'
recs_hybrid.to_parquet(recs_output, index=False)
print(f"‚úì Recomendaciones guardadas en {recs_output}")

### 5.2 Exportar Playlists

In [None]:
# Exportar playlists favoritas
playlists_dir = Path('../data/playlists')
playlists_dir.mkdir(parents=True, exist_ok=True)

playlists_to_export = ['Morning Energy', 'Never Skip Hits', 'All-Time Favorites']

for name in playlists_to_export:
    if name in generator.playlists:
        output_file = playlists_dir / f"{name.replace(' ', '_').lower()}.csv"
        generator.export_playlist(name, str(output_file))

print(f"\n‚úì {len(playlists_to_export)} playlists exportadas a {playlists_dir}")

## 6. Insights y Conclusiones

### 6.1 Resumen del Sistema de Recomendaci√≥n

In [None]:
print("\n" + "="*70)
print("RESUMEN DEL SISTEMA DE RECOMENDACI√ìN")
print("="*70)

print("\nüìä Estad√≠sticas del cat√°logo:")
print(f"  ‚Ä¢ Total de tracks analizados: {len(recommender.track_features):,}")
print(f"  ‚Ä¢ Tracks con suficiente data (>3 plays): {len(recommender.track_features[recommender.track_features['total_plays'] >= 3]):,}")

if 'artist' in recommender.track_features.columns:
    print(f"  ‚Ä¢ Artistas √∫nicos: {recommender.track_features['artist'].nunique():,}")

print("\nüéØ Caracter√≠sticas del usuario:")
if 'skip_tolerance' in recommender.user_profile:
    print(f"  ‚Ä¢ Skip tolerance: {recommender.user_profile['skip_tolerance']}")
if 'peak_hours' in recommender.user_profile:
    print(f"  ‚Ä¢ Horas pico de escucha: {recommender.user_profile['peak_hours']}")
if 'avg_skip_rate' in recommender.user_profile:
    print(f"  ‚Ä¢ Skip rate promedio: {recommender.user_profile['avg_skip_rate']:.1%}")

print("\n‚ú® Estrategias de recomendaci√≥n disponibles:")
print("  ‚Ä¢ Similar: Basada en tracks similares a favoritos")
print("  ‚Ä¢ Context: Basada en hora del d√≠a y contexto temporal")
print("  ‚Ä¢ Skip-resistant: Tracks con baja probabilidad de skip")
print("  ‚Ä¢ Hybrid: Combinaci√≥n de todas las estrategias")

### 6.2 Resumen de Playlists Generadas

In [None]:
print("\n" + "="*70)
print("RESUMEN DE PLAYLISTS GENERADAS")
print("="*70)

# Agrupar por categor√≠a
categories = {
    'Temporales': ['Morning Energy', 'Afternoon Vibes', 'Evening Chill', 'Late Night', 
                   'Weekday Focus', 'Weekend Mood'],
    'Por Comportamiento': ['Never Skip Hits', 'Deep Focus', 'Quick Hits', 
                          'All-Time Favorites', 'Shuffle Favorites'],
    'Por Mood': ['High Energy', 'Relaxation', 'Anytime Classics'],
    'Descubrimiento': ['Rediscover']
}

for category, playlist_names in categories.items():
    found = [name for name in playlist_names if name in generator.playlists]
    if found:
        print(f"\nüìÅ {category}: {len(found)} playlists")
        for name in found:
            count = len(generator.get_playlist(name))
            print(f"  ‚Ä¢ {name}: {count} tracks")

print(f"\n‚úì Total: {len(generator.playlists)} playlists √∫nicas generadas")

## 7. Pr√≥ximos Pasos

### Mejoras Potenciales:

1. **üé® Features Avanzados**
   - Integrar datos de audio (tempo, energy, valence)
   - An√°lisis de letras (sentiment analysis)
   - Features de g√©nero musical

2. **ü§ñ Modelos Avanzados**
   - Collaborative filtering con matriz de usuario-track
   - Deep learning (embeddings con neural networks)
   - Reinforcement learning para optimizaci√≥n en tiempo real

3. **üìä Evaluaci√≥n**
   - A/B testing de recomendaciones
   - M√©tricas de engagement (tiempo de escucha, completion rate)
   - Feedback loop con ratings de usuario

4. **üöÄ Producci√≥n**
   - API REST para servir recomendaciones
   - Cache de recomendaciones pre-calculadas
   - Sistema de actualizaci√≥n incremental
   - Monitoreo de performance y drift

5. **üí° Features de Negocio**
   - Recomendaciones por mood/actividad
   - Playlists colaborativas
   - Descubrimiento de artistas emergentes
   - Personalizaci√≥n por contexto (ubicaci√≥n, clima, etc.)

---

**üéµ Este sistema demuestra:**
- Content-based recommendation engine completo
- Generaci√≥n autom√°tica de playlists tem√°ticas
- Clustering y an√°lisis de patrones de escucha
- Evaluaci√≥n de calidad de recomendaciones
- Production-ready code con evaluaci√≥n y m√©tricas