# Data Analysis

## Cargando datasets

In [202]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

pd.set_option('display.max_columns', None)
sns.set_style("darkgrid")

# Cargamos el dataframe final de extracción, donde están todas las canciones, con duplicados (los hemos dejado
# por si utilizamos en algún momento la columna position)
data_ana = pd.read_csv("data_global.csv", sep = ',', parse_dates=['release_year'])
print(data_ana.shape)

# Quitamos las columnas que no usaremos casi seguro
data_ana= data_ana.drop(columns=['Unnamed: 0', 'Position'])
print(data_ana.shape)

# Quitamos los duplicados por canciones, no nos sirven de momento para el análisis principal.
data_ana = data_ana.drop_duplicates(subset='spotify_id').copy()
print(data_ana.shape)

#Revisamos los tipos de datos:
print(data_ana.info())

(48066, 38)
(48066, 36)
(1581, 36)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1581 entries, 0 to 48064
Data columns (total 36 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Track Name         1581 non-null   object        
 1   Artist             1581 non-null   object        
 2   Streams            1581 non-null   int64         
 3   date               1581 non-null   object        
 4   spotify_id         1581 non-null   object        
 5   year               1581 non-null   int64         
 6   Streamstotal       1581 non-null   int64         
 7   album              1581 non-null   object        
 8   release_date       1581 non-null   object        
 9   length             1581 non-null   float64       
 10  popularity         1581 non-null   float64       
 11  acousticness       1581 non-null   float64       
 12  danceability       1581 non-null   float64       
 13  energy             1581 non

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


## Calcular columna de pesos respecto a los streams totales para Ponderación por años 

In [207]:
# Vamos a añadir los pesos de cada canción en una nueva columna
# Versión optimizada corta y sin warnings

data_ana_2020 = data_ana.loc[data_ana['year'] == 2020]
data_ana_2019 = data_ana.loc[data_ana['year'] == 2019]
data_ana_2018 = data_ana.loc[data_ana['year'] == 2018]
data_ana_2017 = data_ana.loc[data_ana['year'] == 2017]

data_ana.loc[data_ana.year == 2020, 'streamstotal_weights'] = data_ana_2020['Streamstotal']/data_ana_2020['Streamstotal'].sum()
data_ana.loc[data_ana.year == 2019, 'streamstotal_weights'] = data_ana_2019['Streamstotal']/data_ana_2019['Streamstotal'].sum()
data_ana.loc[data_ana.year == 2018, 'streamstotal_weights'] = data_ana_2018['Streamstotal']/data_ana_2018['Streamstotal'].sum()
data_ana.loc[data_ana.year == 2017, 'streamstotal_weights'] = data_ana_2017['Streamstotal']/data_ana_2017['Streamstotal'].sum()

# Comprobaciones
print(data_ana.shape)

# Deberían sumar 1 los pesos de cada año
print(data_ana.loc[data_ana['year'] == 2020, 'streamstotal_weights'].sum())
print(data_ana.loc[data_ana['year'] == 2019, 'streamstotal_weights'].sum())
print(data_ana.loc[data_ana['year'] == 2018, 'streamstotal_weights'].sum())
print(data_ana.loc[data_ana['year'] == 2017, 'streamstotal_weights'].sum())

#Ver cómo sale
data_ana[data_ana['Track Name'] == 'Punto G']

# Genres analysis

In [163]:
data_ana[data_ana['genre2'] =='latin'].head(50)

Unnamed: 0,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal,album,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature,genre1,genre2,genre3,genre4,genre5,genre6,genre7,genre8,genre9,genre10,genres_together,artist_id,artist_followers,artist_popularity,streamstotal_weights
10327,Coronao Now (Remix),El Alfa,82448,2020-03-14,047WmwIeerHyIUstFAEz5A,2020,3060737,Coronao Now (Remix),2020-02-12,0.451159,0.71,0.255,0.883,0.853,0.000723,0.412,0.86241,0.209612,0.695,0.371194,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.002726
10375,4K,El Alfa,48582,2020-03-14,4xI3U4sDj1TPmO9Iz5TkYp,2020,4491002,4K,2020-03-12,0.326357,0.79,0.357,0.925,0.9,0.00595,0.124,0.875936,0.17487,0.804,0.384744,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.003999
10436,Coronao Now,El Alfa,46740,2020-03-14,7FbKipScVxkjQxQXwTd0gL,2020,400264,Coronao Now,2019-11-07,0.368363,0.7,0.146,0.941,0.729,0.00451,0.18,0.770913,0.322525,0.718,0.371348,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.000356
11058,Te Quemaste,Manuel Turizo,72713,2020-03-14,4fj7e6Tc48Bmomwyz2Z2XC,2020,2841727,ADN,2019-08-23,0.320177,0.74,0.611,0.729,0.773,0.00106,0.287,0.937492,0.156051,0.718,0.759045,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.002531
11119,Quiéreme Mientras Se Pueda,Manuel Turizo,43423,2020-05-05,34LI7rwi9H8w2S5KTHnv1M,2020,373548,Quiéreme Mientras Se Pueda,2020-05-01,0.301855,0.84,0.447,0.793,0.782,0.0,0.124,0.889593,0.050666,0.81,0.543953,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.000333
11128,Esclavo de Tus Besos,Manuel Turizo,173182,2019-03-14,3g4UyIcQwutiG0TfW32GnX,2019,9188408,Esclavo de Tus Besos,2019-02-15,0.372006,0.69,0.113,0.805,0.671,0.0,0.278,0.844583,0.127099,0.522,0.190638,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.009858
11189,Sola,Manuel Turizo,49703,2019-03-14,675w3ACw5xZR3ODhUJlEVN,2019,2606780,Sola,2018-11-09,0.309802,0.58,0.483,0.704,0.706,0.0,0.0965,0.847308,0.127099,0.856,0.203907,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.002797
11250,Esperándote,Manuel Turizo,92181,2018-03-14,1nfyE13arjOuO54HH0zmqj,2018,6070852,Esperándote,2017-12-07,0.322031,0.65,0.425,0.697,0.648,0.0,0.191,0.785259,0.151708,0.313,0.826339,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.008635
11311,Una Lady Como Tú,Manuel Turizo,31895,2018-03-14,7MHN1aCFtLXjownGhvEQlF,2018,1658028,Una Lady Como Tú,2017-04-10,0.355913,0.72,0.549,0.788,0.477,0.0,0.349,0.777873,0.029241,0.86,0.21739,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.002358
11372,Una Lady Como Tú (feat. Nicky Jam) - Remix,Manuel Turizo,26139,2018-03-14,7MMRoGvm7uhoUVhGvHJ3yW,2018,82768,Una Lady Como Tú (feat. Nicky Jam) [Remix],2017-08-04,0.395542,0.59,0.392,0.518,0.532,0.0,0.355,0.7761,0.078025,0.741,0.217557,4,colombian pop,latin,latin pop,reggaeton,reggaeton colombiano,tropical,,,,,"colombian pop,latin,latin pop,reggaeton,reggae...",0tmwSHipWxN12fsoLcFU3B,5881079,0.867347,0.000118


In [164]:
data_ana.Artist.value_counts()

Bad Bunny           44
J Balvin            31
Ozuna               28
C. Tangana          25
Ed Sheeran          22
                    ..
La Nueva Escuela     1
MÉLOVIN              1
Rosana               1
Vetusta Morla        1
KvndySwing           1
Name: Artist, Length: 474, dtype: int64

In [165]:
number_genres = data_ana.genre1.value_counts()
number_genres.head(60)

latin                        379
dance pop                    199
colombian pop                 83
latin pop                     68
pop                           58
trap espanol                  56
spanish pop                   54
cantautor                     48
latin arena pop               34
big room                      31
r&b en espanol                28
canadian contemporary r&b     22
argentine hip hop             21
latin viral pop               20
electropop                    20
canadian hip hop              19
flamenco urbano               18
eurovision                    17
rap espanol                   17
dominican pop                 16
modern rock                   15
dfw rap                       13
boy band                      12
conscious hip hop             12
champeta                      11
canadian pop                  11
panamanian pop                11
garage rock                   11
trap latino                   10
emo rap                        9
dembow    

In [350]:
# Para depurar el genre, los que el genre1 sea latin, se les asignará el valor en genre2, que es más específico
# y los que el genre1 no sea latin se les asignara ese genre1
data_ana_nolatinfirst = data_ana[data_ana['genre1'] != 'latin']
data_ana_latinfirst = data_ana[data_ana['genre1'] == 'latin']

data_ana.loc[data_ana.genre1 == 'latin', 'genre_final'] = data_ana_latinfirst['genre2']
data_ana.loc[data_ana.genre2 == 'latin pop', 'genre_final'] = data_ana_latinfirst['genre3']
data_ana.loc[data_ana.genre1 != 'latin', 'genre_final'] = data_ana_nolatinfirst['genre1']



In [351]:
number_genres_new = data_ana.genre_final.value_counts()
number_genres_new.head(60)

dance pop                    199
reggaeton                    148
latin hip hop                 89
colombian pop                 83
latin arena pop               75
latin pop                     68
spanish pop                   59
pop                           58
trap espanol                  56
cantautor                     48
puerto rican pop              47
r&b en espanol                41
big room                      31
latin viral pop               22
canadian contemporary r&b     22
argentine hip hop             21
electropop                    20
canadian hip hop              19
flamenco urbano               18
reggaeton flow                18
rap espanol                   17
eurovision                    17
dominican pop                 16
modern rock                   15
dfw rap                       13
boy band                      12
conscious hip hop             12
garage rock                   11
champeta                      11
canadian pop                  11
panamanian

In [352]:
#Tras un análisis por géneros, es complicado, porque el primer género listado no distingue bien
# Si encuentra latin, pasa a la siguiente columna y busca por los otros términos y así sicesivamente

list_genres_pop = 'k-pop, pop, boy band, eurovision'
list_genres_dancepop = 'dance, tropical, edm, electropop, big room'
list_genres_rock = 'rock, indie'
reggaeton = 'dembow, reggaeton, champeta'
list_genres_rap = 'hip hop, rap, funk, trap, r&b'
cantautor = 'cantautor, flamenco'

'''
list_genres_pop = ['k-pop', 'pop', 'boy band', 'eurovision']
list_genres_dancepop = ['dance', 'tropical', 'edm', 'electropop', 'big room']
list_genres_rock = ['rock', 'indie']
reggaeton = ['dembow', 'reggaeton', 'champeta']
list_genres_rap = ['hip hop', 'rap', 'funk', 'trap', 'r&b']
cantautor = ['cantautor','flamenco']'''


"\nlist_genres_pop = ['k-pop', 'pop', 'boy band', 'eurovision']\nlist_genres_dancepop = ['dance', 'tropical', 'edm', 'electropop', 'big room']\nlist_genres_rock = ['rock', 'indie']\nreggaeton = ['dembow', 'reggaeton', 'champeta']\nlist_genres_rap = ['hip hop', 'rap', 'funk', 'trap', 'r&b']\ncantautor = ['cantautor','flamenco']"

In [353]:
data_ana['genre_depurated'] = pd.np.where(data_ana.genre_final.str.contains("dance"), 'dance, tropical, edm, electropop, big room',
                              pd.np.where(data_ana.genre_final.str.contains("tropical"), 'dance, tropical, edm, electropop, big room',            
                              pd.np.where(data_ana.genre_final.str.contains("edm"), 'dance, tropical, edm, electropop, big room',
                              pd.np.where(data_ana.genre_final.str.contains("electropop"), 'dance, tropical, edm, electropop, big room',
                              pd.np.where(data_ana.genre_final.str.contains("big room"), 'dance, tropical, edm, electropop, big room',
                                          
                              pd.np.where(data_ana.genre_final.str.contains("pop"), 'pop, k-pop, boy band, eurovision, carnaval',
                              pd.np.where(data_ana.genre_final.str.contains("k-pop"), 'pop, k-pop, boy band, eurovision, carnaval',
                              pd.np.where(data_ana.genre_final.str.contains("carnaval"), 'pop, k-pop, boy band, eurovision, carnaval',                      
                              pd.np.where(data_ana.genre_final.str.contains("eurovision"), 'pop, k-pop, boy band, eurovision, carnaval',
                              pd.np.where(data_ana.genre_final.str.contains("boy band"), 'pop, k-pop, boy band, eurovision, carnaval',
                        
                              pd.np.where(data_ana.genre_final.str.contains("rock"), 'rock, indie',            
                              pd.np.where(data_ana.genre_final.str.contains("indie"), 'rock, indie',
                                          
                              pd.np.where(data_ana.genre_final.str.contains("bachata"), 'dembow, reggaeton, champeta, cubaton',                               
                              pd.np.where(data_ana.genre_final.str.contains("dembow"), 'dembow, reggaeton, champeta, cubaton',            
                              pd.np.where(data_ana.genre_final.str.contains("reggaeton"), 'dembow, reggaeton, champeta, cubaton',            
                              pd.np.where(data_ana.genre_final.str.contains("champeta"), 'dembow, reggaeton, champeta, cubaton',
                              pd.np.where(data_ana.genre_final.str.contains("cubaton"), 'dembow, reggaeton, champeta, cubaton',
                                          
                              pd.np.where(data_ana.genre_final.str.contains("hip hop"), "rap, hip hop, funk, trap, r&b",            
                              pd.np.where(data_ana.genre_final.str.contains("rap"), "rap, hip hop, funk, trap, r&b",            
                              pd.np.where(data_ana.genre_final.str.contains("funk"), "rap, hip hop, funk, trap, r&b",            
                              pd.np.where(data_ana.genre_final.str.contains("trap"), "rap, hip hop, funk, trap, r&b",            
                              pd.np.where(data_ana.genre_final.str.contains("r&b"), "rap, hip hop, funk, trap, r&b",
                                          
                              pd.np.where(data_ana.genre_final.str.contains("cantautor"), 'cantautor, flamenco',            
                              pd.np.where(data_ana.genre_final.str.contains("flamenco"), 'cantautor, flamenco',            
                                    
                                          "other"))))))))))))))))))))))))
data_ana[data_ana['genre_final'] == 'dembow']

  data_ana['genre_depurated'] = pd.np.where(data_ana.genre_final.str.contains("dance"), 'dance, tropical, edm, electropop, big room',
  pd.np.where(data_ana.genre_final.str.contains("tropical"), 'dance, tropical, edm, electropop, big room',
  pd.np.where(data_ana.genre_final.str.contains("edm"), 'dance, tropical, edm, electropop, big room',
  pd.np.where(data_ana.genre_final.str.contains("electropop"), 'dance, tropical, edm, electropop, big room',
  pd.np.where(data_ana.genre_final.str.contains("big room"), 'dance, tropical, edm, electropop, big room',
  pd.np.where(data_ana.genre_final.str.contains("pop"), 'pop, k-pop, boy band, eurovision, carnaval',
  pd.np.where(data_ana.genre_final.str.contains("k-pop"), 'pop, k-pop, boy band, eurovision, carnaval',
  pd.np.where(data_ana.genre_final.str.contains("carnaval"), 'pop, k-pop, boy band, eurovision, carnaval',
  pd.np.where(data_ana.genre_final.str.contains("eurovision"), 'pop, k-pop, boy band, eurovision, carnaval',
  pd.np.where(data_

Unnamed: 0,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal,album,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature,genre1,genre2,genre3,genre4,genre5,genre6,genre7,genre8,genre9,genre10,genres_together,artist_id,artist_followers,artist_popularity,streamstotal_weights,genre_final,genre_depurated
10327,Coronao Now (Remix),El Alfa,82448,2020-03-14,047WmwIeerHyIUstFAEz5A,2020,3060737,Coronao Now (Remix),2020-02-12,0.451159,0.71,0.255,0.883,0.853,0.000723,0.412,0.86241,0.209612,0.695,0.371194,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.002726,dembow,"dembow, reggaeton, champeta, cubaton"
10375,4K,El Alfa,48582,2020-03-14,4xI3U4sDj1TPmO9Iz5TkYp,2020,4491002,4K,2020-03-12,0.326357,0.79,0.357,0.925,0.9,0.00595,0.124,0.875936,0.17487,0.804,0.384744,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.003999,dembow,"dembow, reggaeton, champeta, cubaton"
10436,Coronao Now,El Alfa,46740,2020-03-14,7FbKipScVxkjQxQXwTd0gL,2020,400264,Coronao Now,2019-11-07,0.368363,0.7,0.146,0.941,0.729,0.00451,0.18,0.770913,0.322525,0.718,0.371348,4,dembow,latin,rap dominicano,reggaeton,tropical,,,,,,"dembow,latin,rap dominicano,reggaeton,tropical",2oQX8QiMXOyuqbcZEFsZfm,1086173,0.846939,0.000356,dembow,"dembow, reggaeton, champeta, cubaton"
11375,Rueda,Chimbala,70216,2020-03-14,4NDHYPVJ5zLnR8yYZsMDsu,2020,3871511,Rueda,2019-07-19,0.349507,0.68,0.165,0.888,0.916,4e-06,0.0519,0.947012,0.099884,0.96,0.451617,4,dembow,dominican pop,latin,rap dominicano,,,,,,,"dembow,dominican pop,latin,rap dominicano",4VVEpEhC8NcR7AqNEds42U,205320,0.704082,0.003448,dembow,"dembow, reggaeton, champeta, cubaton"
11436,Rueda - Remix,Chimbala,72331,2020-03-20,3EWxpqOSP0KWgUmRiklqGy,2020,5466191,Rueda (Remix),2020-03-19,0.354069,0.72,0.256,0.864,0.925,9e-06,0.0648,0.95742,0.072669,0.961,0.451744,4,dembow,dominican pop,latin,rap dominicano,,,,,,,"dembow,dominican pop,latin,rap dominicano",4VVEpEhC8NcR7AqNEds42U,205320,0.704082,0.004868,dembow,"dembow, reggaeton, champeta, cubaton"
16382,La Rubia - Remix 2,La Nueva Escuela,61243,2020-03-14,7KKTnv81z6DmfPIGsZWjOW,2020,3253091,La Rubia,2019-04-19,0.350359,0.68,0.319,0.758,0.717,6e-06,0.378,0.881418,0.156051,0.753,0.316969,4,dembow,dominican pop,,,,,,,,,"dembow,dominican pop",7mcJN2bWqCv7jvykgHp4KN,64412,0.581633,0.002897,dembow,"dembow, reggaeton, champeta, cubaton"
26980,Ellos,Ceky Viciny,34777,2020-03-17,7bDboJVS7UlQBZj8lsvz2g,2020,1262764,Ellos,2019-07-05,0.25861,0.45,0.124,0.754,0.939,0.00281,0.293,0.95499,0.022438,0.853,0.397939,4,dembow,dominican pop,rap dominicano,,,,,,,,"dembow,dominican pop,rap dominicano",5UopXhshFFqRIMfeZrBclq,62120,0.602041,0.001125,dembow,"dembow, reggaeton, champeta, cubaton"
27016,Ellos,Ceky Viciny,31018,2019-04-18,1tDUwlIyFFcmfMypYoQ4Ns,2019,2039392,Ellos,2019-03-11,0.25861,0.0,0.124,0.754,0.939,0.00281,0.293,0.95499,0.022438,0.853,0.397939,4,dembow,dominican pop,rap dominicano,,,,,,,,"dembow,dominican pop,rap dominicano",5UopXhshFFqRIMfeZrBclq,62120,0.602041,0.002188,dembow,"dembow, reggaeton, champeta, cubaton"
35549,Mujeres,Mozart La Para,46228,2019-03-14,6xLvYhLOW2oETJoZv8ChU8,2019,1836540,Mujeres,2018-06-08,0.290882,0.6,0.204,0.835,0.767,7.9e-05,0.261,0.832009,0.065721,0.71,0.250723,4,dembow,dominican pop,rap dominicano,,,,,,,,"dembow,dominican pop,rap dominicano",0odliLZMTk45CEVzF3Zocl,254471,0.704082,0.00197,dembow,"dembow, reggaeton, champeta, cubaton"


In [356]:
number_genres2 = data_ana.genre_depurated.value_counts()
number_genres2.head(60)

pop, k-pop, boy band, eurovision, carnaval    549
rap, hip hop, funk, trap, r&b                 371
dance, tropical, edm, electropop, big room    288
dembow, reggaeton, champeta, cubaton          207
other                                          69
cantautor, flamenco                            66
rock, indie                                    40
Name: genre_depurated, dtype: int64

In [357]:
#Total reproducciones 
data_group = data_ana.groupby(['genre_depurated']).agg(['sum', 'count'])
data_group['year']
#Falta hacerlo por ponderación con reproducciones

Unnamed: 0_level_0,sum,count
genre_depurated,Unnamed: 1_level_1,Unnamed: 2_level_1
"cantautor, flamenco",133244,66
"dance, tropical, edm, electropop, big room",581235,288
"dembow, reggaeton, champeta, cubaton",417920,207
other,139260,69
"pop, k-pop, boy band, eurovision, carnaval",1108139,549
"rap, hip hop, funk, trap, r&b",748926,371
"rock, indie",80717,40


In [358]:
data_group = data_ana.groupby(['genre_depurated', 'year']).agg(['sum', 'count'])
data_group['Streamstotal']

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count
genre_depurated,year,Unnamed: 2_level_1,Unnamed: 3_level_1
"cantautor, flamenco",2017,4965927,10
"cantautor, flamenco",2018,5919745,14
"cantautor, flamenco",2019,12525477,18
"cantautor, flamenco",2020,50389798,24
"dance, tropical, edm, electropop, big room",2017,132809971,96
"dance, tropical, edm, electropop, big room",2018,136905217,87
"dance, tropical, edm, electropop, big room",2019,139193857,63
"dance, tropical, edm, electropop, big room",2020,83195815,42
"dembow, reggaeton, champeta, cubaton",2017,54618153,35
"dembow, reggaeton, champeta, cubaton",2018,101564360,32


In [359]:
data_group2 = data_ana.groupby(['genre1', 'year']).agg(['sum', 'count'])
data_group2['Streamstotal'].sort_values(by='count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count
genre1,year,Unnamed: 2_level_1,Unnamed: 3_level_1
latin,2020,464786598,120
latin,2019,363379181,99
latin,2018,287024324,83
latin,2017,112415135,77
dance pop,2017,104103680,75
...,...,...,...
danish pop,2019,2436163,1
dancehall,2017,5700710,1
czech pop,2018,221520,1
cumbia,2018,1539931,1


In [360]:
data_ana[data_ana['genre_depurated'] == 'dembow, reggaeton, champeta, cubaton']

Unnamed: 0,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal,album,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature,genre1,genre2,genre3,genre4,genre5,genre6,genre7,genre8,genre9,genre10,genres_together,artist_id,artist_followers,artist_popularity,streamstotal_weights,genre_final,genre_depurated
0,Tusa,KAROL G,446086,2020-03-14,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892,Tusa,2019-11-07,0.324972,0.91,0.29500,0.803,0.715,0.000134,0.0574,0.911425,0.397800,0.574,0.258160,4,latin,latin pop,reggaeton,reggaeton colombiano,,,,,,,"latin,latin pop,reggaeton,reggaeton colombiano",790FomKkXshlbRYZFtlgla,11337789,0.887755,0.016786,reggaeton,"dembow, reggaeton, champeta, cubaton"
61,Follow,KAROL G,152745,2020-04-02,2w1rq80FIrbxeJy9WYURxw,2020,7027186,Follow,2020-04-01,0.330442,0.78,0.66700,0.735,0.787,0.000006,0.0721,0.875476,0.198031,0.455,0.711901,4,latin,latin pop,reggaeton,reggaeton colombiano,,,,,,,"latin,latin pop,reggaeton,reggaeton colombiano",790FomKkXshlbRYZFtlgla,11337789,0.887755,0.006258,reggaeton,"dembow, reggaeton, champeta, cubaton"
103,Créeme,KAROL G,72543,2019-03-14,1mT47pKLXUwPYGwB91GcNz,2019,3197262,Créeme,2018-11-02,0.354546,0.72,0.15400,0.715,0.872,0.000146,0.1100,0.904169,0.045599,0.682,0.230459,4,latin,latin pop,reggaeton,reggaeton colombiano,,,,,,,"latin,latin pop,reggaeton,reggaeton colombiano",790FomKkXshlbRYZFtlgla,11337789,0.887755,0.003430,reggaeton,"dembow, reggaeton, champeta, cubaton"
153,Culpables,KAROL G,56296,2019-03-14,6q8Lb50EtqNeeJXXe8mMAH,2019,2080234,Culpables,2018-09-14,0.390787,0.69,0.27900,0.728,0.801,0.000005,0.1430,0.922521,0.065287,0.416,0.451744,4,latin,latin pop,reggaeton,reggaeton colombiano,,,,,,,"latin,latin pop,reggaeton,reggaeton colombiano",790FomKkXshlbRYZFtlgla,11337789,0.887755,0.002232,reggaeton,"dembow, reggaeton, champeta, cubaton"
203,Punto G,KAROL G,51826,2019-04-05,5Lb1pjnll07UQKxNpnnGBy,2019,1930173,Punto G,2019-04-05,0.275291,0.61,0.00796,0.787,0.751,0.000139,0.0857,0.863559,0.091633,0.926,0.691617,4,latin,latin pop,reggaeton,reggaeton colombiano,,,,,,,"latin,latin pop,reggaeton,reggaeton colombiano",790FomKkXshlbRYZFtlgla,11337789,0.887755,0.002071,reggaeton,"dembow, reggaeton, champeta, cubaton"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46155,Bailame Despacio,Xantos,79678,2017-03-14,4OaTpLJIa2harufGxfOPUu,2017,5050041,Revolucionario,2015-10-21,0.425083,0.55,0.16300,0.688,0.719,0.000000,0.1500,0.863296,0.046757,0.605,0.170835,4,reggaeton,,,,,,,,,,reggaeton,3HPOnbuwLP2qlwWQ7cgxYW,167636,0.540816,0.011897,reggaeton,"dembow, reggaeton, champeta, cubaton"
46297,"Ay Mi Dios (feat. Pitbull, Yandel & Chacal)",IAmChino,66219,2017-03-14,6stYbAJgTszHAHZMPxWWCY,2017,5211785,"Ay Mi Dios (feat. Pitbull, Yandel & Chacal)",2016-02-22,0.455862,0.00,0.16700,0.761,0.829,0.000000,0.1890,0.913953,0.064997,0.813,0.197680,4,cubaton,reggaeton,tropical,,,,,,,,"cubaton,reggaeton,tropical",0b2GL7Y02vu50qieoQmw1w,43986,0.591837,0.012278,cubaton,"dembow, reggaeton, champeta, cubaton"
46358,Hasta Que Se Seque el Malecón (feat. Farruko) ...,Jacob Forever,59790,2017-03-14,6sMPwcpYtxm1mlgYbp1B0t,2017,1817657,Invicto,2017-02-06,0.362136,0.60,0.36200,0.785,0.838,0.000006,0.1390,0.823736,0.021569,0.765,0.251351,4,cubaton,tropical,,,,,,,,,"cubaton,tropical",4fCRFHEQgjqakvFgQCliMp,128729,0.612245,0.004282,cubaton,"dembow, reggaeton, champeta, cubaton"
46419,Quiéreme,Jacob Forever,22337,2017-04-01,649tz8MtbCHSTEzNBw7c1Q,2017,504153,Invicto,2017-02-06,0.300934,0.49,0.44300,0.703,0.663,0.000062,0.0982,0.851609,0.067458,0.384,0.758591,4,cubaton,tropical,,,,,,,,,"cubaton,tropical",4fCRFHEQgjqakvFgQCliMp,128729,0.612245,0.001188,cubaton,"dembow, reggaeton, champeta, cubaton"
