# Data Analysis - Most played

## Cargando dataset y preparación

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import altair as alt

# Configuración Jupyter
pd.set_option('display.max_columns', None)
sns.set_style("darkgrid")

# Cargamos el dataframe ya limpio desde el análisis de Features
data_ana_corona = pd.read_csv("data_ana_coronaperiod_withgenre.csv", sep = ',', parse_dates=['release_date_year'])
data_ana_corona = data_ana_corona.drop(columns=['Unnamed: 0'])

print('Tamaño inicial: ',data_ana_corona.shape)
data_ana_corona.sample(2)

Tamaño inicial:  (1581, 39)


Unnamed: 0,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal,album,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature,release_date_year,genre1,genre2,genre3,genre4,genre5,genre6,genre7,genre8,genre9,genre10,genres_together,artist_id,artist_followers,artist_popularity,streamstotal_weights,genre_depurated,genre_depurated_final
602,Falling,Trevor Daniel,46467,2020-03-13,4TnjEaWOeW0eKTKIEvJyCa,2020,608831,Falling,2018-10-05,0.218351,0.887755,0.123,0.785,0.431,0.0,0.0887,0.731648,0.019108,0.236,0.431874,4,2018-01-01,alternative r&b,melodic rap,pop rap,,,,,,,,"alternative r&b,melodic rap,pop rap",7uaIm6Pw7xplS8Dy06V6pT,463345,0.818182,0.000543,alternative r&b,hip hop
1039,La Plata (feat. Lalo Ebratt),Juanes,101315,2019-03-13,7Eso5mURNFprb5PmhOXDlf,2019,4453364,La Plata,2019-01-11,0.310953,0.632653,0.201,0.762,0.74,0.0,0.275,0.86415,0.293573,0.961,0.704445,4,2019-01-01,colombian pop,latin,latin pop,mexican pop,rock en espanol,tropical,,,,,"colombian pop,latin,latin pop,mexican pop,rock...",0UWZUmn7sybxMCqrw9tGa7,2945416,0.79798,0.004776,colombian pop,pop


# ¿Qué ha sido los más escuchado durante la pandemia?

Las 20 canciones más escuchadas por sus reproducciones

In [14]:
# Selecciono año
datamost_2020 = data_ana_corona[data_ana_corona['year']== 2020]

#Agrupo
datamost_2020_track = datamost_2020.groupby(['Track Name', 'Artist'])['Streamstotal'].sum()
datamost_2020_track = datamost_2020_track.sort_values(ascending=False).head(20)

#Transformo en dataframe
datamost_2020_track = datamost_2020_track.to_frame().reset_index()

#Gráfico
graph_most_2020_track = alt.Chart(datamost_2020_track).mark_bar().encode(
    x = alt.X('Track Name', sort=None),
    y = 'Streamstotal',
    color = alt.Color("Artist", sort=None),
).properties(width=600, height=400).interactive()

# Exportar para Datawrapper:
datamost_2020_track.to_csv("graphs_article/graph_mostplayed_songs.csv", sep = ',')

graph_most_2020_track

Los 20 artistas más escuchados por sus reproducciones

In [15]:
#Agrupo
datamost_2020_artist = datamost_2020.groupby(['Artist'])['Streamstotal'].sum()
datamost_2020_artist = datamost_2020_artist.sort_values(ascending=False).head(20)

#Transformo en dataframe
datamost_2020_artist = datamost_2020_artist.to_frame().reset_index()

#Gráfico
graph_most_2020_artist = alt.Chart(datamost_2020_artist).mark_bar().encode(
    x = alt.X('Artist', sort=None),
    y = 'Streamstotal',
    color = alt.Color("Artist", sort=None),
).properties(width=600, height=400).interactive()



# Exportar para Datawrapper:
datamost_2020_artist.to_csv("graphs_article/graph_mostplayed_artists.csv", sep = ',')

graph_most_2020_artist

Las 20 artistas más esuchcados por su género y reproducciones

In [16]:
#Agrupo
datamost_2020_genre = datamost_2020.groupby(['genre_depurated_final', 'Artist'])['Streamstotal'].sum()
datamost_2020_genre = datamost_2020_genre.sort_values(ascending=False).head(200)

#Transformo en dataframe
datamost_2020_genre = datamost_2020_genre.to_frame().reset_index()

#Gráfico
graph_most_2020_genre = alt.Chart(datamost_2020_genre).mark_bar().encode(
    x = alt.X('genre_depurated_final', sort=None),
    y = 'Streamstotal',
    color = alt.Color('Artist', sort=None),
    tooltip='Artist'
).properties(width=600, height=400).interactive()



# Exportar para Datawrapper:
datamost_2020_genre.to_csv("graphs_article/graph_mostplayed_artistandgenre.csv", sep = ',')

graph_most_2020_genre

Los géneros más escuchados

In [20]:
#Agrupo
datamost_2020_genre2 = datamost_2020.groupby(['genre_depurated_final'])['Streamstotal'].sum()
datamost_2020_genre2 = datamost_2020_genre2.sort_values(ascending=False).head(200)
datamost_2020_genre2

genre_depurated_final
reggaeton    365812646
pop          307839144
hip hop      273840329
cantautor     78533447
dance         72708573
other         15210773
rock           6428703
Name: Streamstotal, dtype: int64

In [23]:
#Agrupo
datamost_2020_genre2 = datamost_2020.groupby(['genre_depurated_final'])['Streamstotal'].sum()
datamost_2020_genre2 = datamost_2020_genre2.sort_values(ascending=False).head(200)

# Exportar para Datawrapper:
datamost_2020_genre2.to_csv("graphs_article/graph_mostplayed_genres.csv", sep = ',')
