In [1]:
import pandas as pd
import numpy as np
import requests
import os 
from dotenv import load_dotenv
from tqdm import tqdm
import pickle
import re
pd.options.display.max_columns = None
from fuzzywuzzy import process, fuzz
from collections import Counter
import calendar

import sys
sys.path.append("../")
import src.soporteAPIs as sa
sys.path.append("../")
import src.biblioteca as bb
sys.path.append("../")
import src.soportecleaning as sc
sys.path.append('../')
import src.soporteCharts as charts

# Importing pickle file

In [2]:
with open('../data/pickle/artist.pickle', 'rb') as tracks:
    tracks = pickle.load(tracks)

I will split the artist column in order to split between main artist (`artist_0`) and possible featuring artist (`artist_1`, `artist_1`)

In [3]:
tracks = sc.split_artists(tracks)
print(tracks.shape)
tracks.head()

(14663, 6)


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,


# lastfm
In this part of the process I will extract form the LastFM API all the data related to the tracks and artist genres.

First, I need to configure my credentials.

In [4]:
load_dotenv()

True

In [5]:
api = os.getenv("lastfm-id")
username = os.getenv("lastfm-user")

Testing to gather the data from the api with a function:

In [6]:
res = sa.getLastFMData({
    'method': 'artist.getInfo',
    'artist':'Harry Styles',
})
res.json()['artist']['bio']

{'links': {'link': {'#text': '',
   'rel': 'original',
   'href': 'https://last.fm/music/Harry+Styles/+wiki'}},
 'published': '24 Aug 2011, 17:11',
 'summary': 'Harry Edward Styles (born 1 February 1994 in Redditch, Worcestershire, England) is a three-time GRAMMY award-winning, British singer, songwriter, and actor. As a member of the British/Irish boy band One Direction, singer Harry Styles topped the charts, toured the world, and sold millions of albums before going solo in 2016. Styles was raised in Holmes Chapel, Cheshire, and he made his first foray into music with a high-school band named White Eskimo. <a href="https://www.last.fm/music/Harry+Styles">Read more on Last.fm</a>',
 'content': 'Harry Edward Styles (born 1 February 1994 in Redditch, Worcestershire, England) is a three-time GRAMMY award-winning, British singer, songwriter, and actor. As a member of the British/Irish boy band One Direction, singer Harry Styles topped the charts, toured the world, and sold millions of alb

This is for the whole dataframe:

In [7]:
tqdm.pandas()

tracks[['bio', 'artist_tag']] = tracks.progress_apply(lambda x: sa.getArtistInfo(x.artist_0), axis=1, result_type='expand')

100%|██████████| 14663/14663 [54:30<00:00,  4.48it/s] 


In [8]:
tracks.head()

Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,bio,artist_tag
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last...."
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last...."
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'rnb', 'url': 'https://www.last.fm/t..."
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'electronic', 'url': 'https://www.la..."
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last...."


Now I get the tags for each individual track:

In [9]:
res = sa.getLastFMData({
    'method': 'track.getInfo',
    'artist':'Harry Styles',
    'track':'As It Was'
})
res.json()['track']['toptags']['tag']#['published']#[0]['name']

[{'name': 'pop', 'url': 'https://www.last.fm/tag/pop'},
 {'name': 'rock', 'url': 'https://www.last.fm/tag/rock'},
 {'name': 'indie pop', 'url': 'https://www.last.fm/tag/indie+pop'},
 {'name': 'synthpop', 'url': 'https://www.last.fm/tag/synthpop'},
 {'name': 'british', 'url': 'https://www.last.fm/tag/british'}]

This is for the whole dataset:

In [10]:
tqdm.pandas()

tracks[['track_tag', 'published_track']] = tracks.progress_apply(lambda x: sa.getTrackTags(x.artist_0, x.track), axis=1, result_type='expand')

100%|██████████| 14663/14663 [58:48<00:00,  4.16it/s] 


In [11]:
print(tracks['track_tag'].isnull().sum(), tracks['artist_tag'].isnull().sum())
tracks.head()

9031 1


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,bio,artist_tag,track_tag,published_track
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","30 Jan 2018, 04:41"
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","27 Oct 2016, 02:34"
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'rnb', 'url': 'https://www.last.fm/t...","[{'name': 'rnb', 'url': 'https://www.last.fm/t...","23 Sep 2016, 01:07"
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'electronic', 'url': 'https://www.la...","[{'name': '2016', 'url': 'https://www.last.fm/...","31 Jul 2016, 22:59"
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","27 Oct 2016, 05:23"


# Data Cleaning

In [12]:
with open('../data/pickle/lastfm_dump.pickle', 'wb') as test:
    pickle.dump(tracks, test)

In [13]:
with open('../data/pickle/lastfm_dump.pickle', 'rb') as tracks:
    tracks = pickle.load(tracks)

In [14]:
tracks_final = sc.cleaningLastFM(tracks)
print(tracks_final.shape)
tracks_final.head()

(14663, 25)


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,bio,artist_tag,track_tag,published_track,links,published,summary,content,0,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","30 Jan 2018, 04:41","{'link': {'#text': '', 'rel': 'original', 'hre...","26 Jun 2013, 14:20","Migos is a rap trio-group from Lawrenceville, ...","Migos is a rap trio-group from Lawrenceville, ...",,Hip-Hop,trap,rap,hip hop,atlanta,trap,MySpotigramBot,Hip-Hop,rap,2010s
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","27 Oct 2016, 02:34","{'link': {'#text': '', 'rel': 'original', 'hre...","25 Mar 2006, 06:09","Aubrey Drake Graham (born October 24, 1986) is...","Aubrey Drake Graham (born October 24, 1986) is...",,Hip-Hop,rap,rnb,hip hop,Canadian,trap,Hip-Hop,rap,hiphop,rnb
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'rnb', 'url': 'https://www.last.fm/t...","[{'name': 'rnb', 'url': 'https://www.last.fm/t...","23 Sep 2016, 01:07","{'link': {'#text': '', 'rel': 'original', 'hre...","09 Jan 2011, 12:41","Abel Makkonen Tesfaye, popularly known as The ...","Abel Makkonen Tesfaye, popularly known as The ...",,rnb,electronic,dubstep,Canadian,prog-rnb,rnb,electronic,pop,MySpotigramBot,2010s
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'electronic', 'url': 'https://www.la...","[{'name': '2016', 'url': 'https://www.last.fm/...","31 Jul 2016, 22:59","{'link': {'#text': '', 'rel': 'original', 'hre...","13 Nov 2013, 05:27",A production duo featuring Andrew Taggart and ...,A production duo featuring Andrew Taggart and ...,,electronic,pop,House,electro house,dance,2016,electronic,future bass,halsey,electropop
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,,"{'links': {'link': {'#text': '', 'rel': 'origi...","[{'name': 'Hip-Hop', 'url': 'https://www.last....","[{'name': 'trap', 'url': 'https://www.last.fm/...","27 Oct 2016, 05:23","{'link': {'#text': '', 'rel': 'original', 'hre...","18 Jul 2014, 22:40",Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,,Hip-Hop,rap,hip hop,trap,seen live,trap,rap,pop rap,Southern Hip Hop,Hip-Hop


In [15]:
tracks_final.drop(['bio', 'artist_tag', 'track_tag', 'links', 0], axis=1, inplace=True)
tracks_final.head()

Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,published_track,published,summary,content,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"30 Jan 2018, 04:41","26 Jun 2013, 14:20","Migos is a rap trio-group from Lawrenceville, ...","Migos is a rap trio-group from Lawrenceville, ...",Hip-Hop,trap,rap,hip hop,atlanta,trap,MySpotigramBot,Hip-Hop,rap,2010s
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"27 Oct 2016, 02:34","25 Mar 2006, 06:09","Aubrey Drake Graham (born October 24, 1986) is...","Aubrey Drake Graham (born October 24, 1986) is...",Hip-Hop,rap,rnb,hip hop,Canadian,trap,Hip-Hop,rap,hiphop,rnb
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,,"23 Sep 2016, 01:07","09 Jan 2011, 12:41","Abel Makkonen Tesfaye, popularly known as The ...","Abel Makkonen Tesfaye, popularly known as The ...",rnb,electronic,dubstep,Canadian,prog-rnb,rnb,electronic,pop,MySpotigramBot,2010s
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,,"31 Jul 2016, 22:59","13 Nov 2013, 05:27",A production duo featuring Andrew Taggart and ...,A production duo featuring Andrew Taggart and ...,electronic,pop,House,electro house,dance,2016,electronic,future bass,halsey,electropop
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,,"27 Oct 2016, 05:23","18 Jul 2014, 22:40",Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,Hip-Hop,rap,hip hop,trap,seen live,trap,rap,pop rap,Southern Hip Hop,Hip-Hop


# Adding new columns

Generating a new clean genres column based on the extracted ones from de apis.

In [16]:
tracks_clean = sc.newColumnsLastFM(tracks_final)
print(tracks_clean.shape)
tracks_clean.head()

There are 788 different music genres.


100%|██████████| 14663/14663 [00:00<00:00, 22315.13it/s]


(14663, 24)


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,published_track,published,summary,content,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4,music_genre,gender,birthday_date,age
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"30 Jan 2018, 04:41","26 Jun 2013, 14:20","Migos is a rap trio-group from Lawrenceville, ...","Migos is a rap trio-group from Lawrenceville, ...",Hip-Hop,trap,rap,hip hop,atlanta,trap,MySpotigramBot,Hip-Hop,rap,2010s,hip-hop,group,,
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"27 Oct 2016, 02:34","25 Mar 2006, 06:09","Aubrey Drake Graham (born October 24, 1986) is...","Aubrey Drake Graham (born October 24, 1986) is...",Hip-Hop,rap,rnb,hip hop,Canadian,trap,Hip-Hop,rap,hiphop,rnb,hip-hop,male,,
8248,https://open.spotify.com/track/5aAx2yezTd8zXrk...,"the weeknd, daft punk",starboy,the weeknd,daft punk,,"23 Sep 2016, 01:07","09 Jan 2011, 12:41","Abel Makkonen Tesfaye, popularly known as The ...","Abel Makkonen Tesfaye, popularly known as The ...",rnb,electronic,dubstep,Canadian,prog-rnb,rnb,electronic,pop,MySpotigramBot,2010s,rnb,male,,
8249,https://open.spotify.com/track/7BKLCZ1jbUBVqRi...,"the chainsmokers, halsey",closer,the chainsmokers,halsey,,"31 Jul 2016, 22:59","13 Nov 2013, 05:27",A production duo featuring Andrew Taggart and ...,A production duo featuring Andrew Taggart and ...,electronic,pop,House,electro house,dance,2016,electronic,future bass,halsey,electropop,electronic,group,,
8250,https://open.spotify.com/track/6fujklziTHa8uoM...,"rae sremmurd, gucci mane",black beatles,rae sremmurd,gucci mane,,"27 Oct 2016, 05:23","18 Jul 2014, 22:40",Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,Rae Sremmurd ( /ˈreɪ ʃrˈɪmɜːrd/) is an America...,Hip-Hop,rap,hip hop,trap,seen live,trap,rap,pop rap,Southern Hip Hop,Hip-Hop,hip-hop,group,,


# EDA

Total number of rows and columns of the dataframe

In [17]:
charts.analisis_basico(tracks_clean)

_________________________________

1_Data Structure: (14663, 24)


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,published_track,published,summary,content,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4,music_genre,gender,birthday_date,age
8246,https://open.spotify.com/track/4Km5HrUvYTaSUfi...,migos,bad and boujee (feat. lil uzi vert),migos,,,"30 Jan 2018, 04:41","26 Jun 2013, 14:20","Migos is a rap trio-group from Lawrenceville, ...","Migos is a rap trio-group from Lawrenceville, ...",Hip-Hop,trap,rap,hip hop,atlanta,trap,MySpotigramBot,Hip-Hop,rap,2010s,hip-hop,group,,
8247,https://open.spotify.com/track/343YBumqHu19cGo...,drake,fake love,drake,,,"27 Oct 2016, 02:34","25 Mar 2006, 06:09","Aubrey Drake Graham (born October 24, 1986) is...","Aubrey Drake Graham (born October 24, 1986) is...",Hip-Hop,rap,rnb,hip hop,Canadian,trap,Hip-Hop,rap,hiphop,rnb,hip-hop,male,,


<class 'pandas.core.frame.DataFrame'>
Index: 14663 entries, 8246 to 26138466
Data columns (total 24 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   url              14663 non-null  object 
 1   artist           14663 non-null  object 
 2   track            14663 non-null  object 
 3   artist_0         14663 non-null  object 
 4   artist_1         2518 non-null   object 
 5   artist_2         663 non-null    object 
 6   published_track  5632 non-null   object 
 7   published        14662 non-null  object 
 8   summary          14662 non-null  object 
 9   content          14662 non-null  object 
 10  aritist_genre_0  14258 non-null  object 
 11  aritist_genre_1  14075 non-null  object 
 12  aritist_genre_2  13948 non-null  object 
 13  aritist_genre_3  13772 non-null  object 
 14  aritist_genre_4  13618 non-null  object 
 15  track_genre_0    5216 non-null   object 
 16  track_genre_1    5009 non-null   object 
 17  track_genre

None

_________________________________

2_Duplicated columns:
4
_________________________________

3_Null values distribution:


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,published_track,published,summary,content,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4,music_genre,gender,birthday_date,age
nulos,0,0,0,0,12145,14000,9031,1,1,1,405,588,715,891,1045,9447,9654,9881,10105,10313,0,1577,14663,14663
dtypes,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,float64,object


_________________________________

4_Numerical variables distribution:


Unnamed: 0,birthday_date
count,0.0
mean,
std,
min,
25%,
50%,
75%,
max,


_________________________________

5_Categorical variables distribution:


Unnamed: 0,url,artist,track,artist_0,artist_1,artist_2,published_track,published,summary,content,aritist_genre_0,aritist_genre_1,aritist_genre_2,aritist_genre_3,aritist_genre_4,track_genre_0,track_genre_1,track_genre_2,track_genre_3,track_genre_4,music_genre,gender,age
count,14663,14663,14663,14663,2518,663,5632,14662,14662,14662.0,14258,14075,13948,13772,13618,5216,5009,4782,4558,4350,14663,13086,0.0
unique,14650,5211,11635,3866,1264,455,4367,2899,3866,2968.0,384,520,661,726,812,463,629,746,856,943,26,4,0.0
top,https://open.spotify.com/track/21UkXrc9kD48rNp...,taylor swift,intro,taylor swift,travis scott,quavo,"07 May 2021, 15:50","01 Jan 1970, 00:00",Taylor Alison Swift is an American singer-song...,,Hip-Hop,rap,hip hop,hip hop,hip hop,pop,rap,pop,rap,pop,hip-hop,male,
freq,2,200,14,202,40,20,7,1055,202,1055.0,3433,3035,1244,1786,939,808,563,270,159,175,3446,8038,


# Export

In [None]:
with open('../data/pickle/lastfm.pickle', 'wb') as data_lastfm:
    pickle.dump(tracks_final, data_lastfm)