In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import altair as alt

In [None]:
df = pd.read_csv('spotify_data_cleaned.zip', compression='zip')
df.head()

In [None]:
sns.catplot(x='key', y='popularity', data=df, kind='box', height=5, aspect=2)

In [4]:
muestra = df.sample(frac=0.1)

In [None]:
corr_matrix = df.corr(method='spearman', numeric_only=True)

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Mapa de calor de la Correlación de Spearman')
plt.show()

In [None]:
df_aux = df
df_aux['loudness_color'] = df_aux['loudness'].apply(lambda x: 'Menor que cero' if x < 0 else 'Mayor o igual a cero')

color_map = {'Menor que cero': '#add8e6', 'Mayor o igual a cero': '#2874A6 '}
fig = px.histogram(df_aux, x='loudness', y="energy", histfunc='avg', 
                title='Histograma del Volumen con la Energía Promedio',
                color='loudness_color',
                labels={'loudness': 'Volumen', 'energy': 'Energía'},
                color_discrete_map=color_map,
                nbins=30) 

box = go.Figure(go.Box(x=df['loudness'], boxmean=True, name="Boxplot", marker_color="#cccccc"))

for trace in box.data:
    fig.add_trace(go.Box(x=trace['x'], boxmean=True, marker_color="#148F77"))

fig.update_layout(xaxis_title='Loudness', yaxis_title='Energy', showlegend=False)

fig.show()




In [None]:
songs_per_artist = df.groupby('artist_name', as_index=False)['track_name'].count()

songs_per_artist.rename(columns={'track_name': 'song_count'}, inplace=True)

top_50_artists = songs_per_artist.sort_values(by='song_count', ascending=False).head(50)

fig = px.treemap(top_50_artists, 
                path=['artist_name'], 
                values='song_count',
                color='song_count', 
                color_continuous_scale='RdYlGn',
                title='Top 50 Artistas con Más Canciones')

fig.update_traces(hovertemplate='Artista: %{label}<br>Número de Canciones: %{value}')

fig.show()

In [None]:
df['genre'] = df['genre'].apply(lambda x: x.capitalize())

artist_info = df.groupby('artist_name').agg({
    'popularity': 'mean',
    'genre': 'first'  # Concatena géneros únicos
}).reset_index()

artist_info.rename(columns={'popularity': 'average_popularity'}, inplace=True)

top_50_artists = artist_info.sort_values(by='average_popularity', ascending=False).head(50)

fig = px.treemap(top_50_artists, 
                path=['artist_name'], 
                values='average_popularity',
                color='average_popularity', 
                color_continuous_scale='RdYlGn',
                title='Top 50 Artistas con la media más alta de popularidad',
                custom_data=['genre'],
                hover_data={
                    'genre': True,  # Incluye 'genre' en el hover sin cambiar el nombre
                    'average_popularity': ':.2f'  # Formatea 'average_popularity'
                })

fig.update_traces(hovertemplate='Artista: %{label}<br>Popularidad Media: %{value:.2f}<br>Género(s): %{customdata[0]}')

fig.show()