# NB02: Data Processing

In [1]:
import requests
from dotenv import load_dotenv
from functions import *
from bs4 import BeautifulSoup
from pprint import pprint
from auth import *
import base64
import os
import pandas as pd
import json
import csv
import string
import lyricsgenius
import sqlite3
from sqlalchemy import create_engine

In [2]:
access_token = get_token()

In [3]:
with open('../data/raw/combined_top_hits.json', 'r') as f:
    data = json.load(f)

In [4]:
tracks = []
for item in data['items']:
    track = item.get('track', {})
    track_name = track.get('name')
    track_id = track.get('id')
    artists = ", ".join(artist['name'] for artist in track.get('artists', []))  # Combine all artist names
    tracks.append({'Track Name': track_name, 'Track ID': track_id, 'Artists': artists})


In [5]:
top_hits = pd.DataFrame(tracks)
top_hits

Unnamed: 0,Track Name,Track ID,Artists
0,Into You,76FZM38RC8XaAjJ77CVTNe,Ariana Grande
1,Glad You Came,5yDL13y5giogKs2fSNf7sj,The Wanted
2,Dark Horse,5jrdCoLpJSvHHorevXBATy,"Katy Perry, Juicy J"
3,Who Knew - Edit,2hns6Dv29Yrg68AVTJiAyA,P!nk
4,Closer,7BKLCZ1jbUBVqRi2FVlTVw,"The Chainsmokers, Halsey"
...,...,...,...
268,Training Season,6Qb7YsAqH4wWFUMbGsCpap,Dua Lipa
269,What Makes You Beautiful,4cluDES4hQEUhmXj6TXkSo,One Direction
270,I'm Not The Only One,7795WJLVKJoAyVoOtCWqXN,Sam Smith
271,Stockholm,198asGCZWwoQVdLxYSlPTx,Jubël


In [6]:
# Step 1: Normalize the artist names
top_hits['Artists'] = top_hits['Artists'].str.lower().str.strip()

# Step 2: Handle featured artists and combine them into the same 'Artists' column
def combine_artists(artist_column):
    # Check if 'feat' exists, and if so, split and merge artists
    if 'feat' in artist_column:
        # Split the main artist and featured artist(s) and remove any extra spaces
        artists = artist_column.split('feat')
        main_artist = artists[0].strip()
        featured_artists = artists[1].strip()
        # Combine main artist with featured artists, avoiding duplicates
        combined_artists = main_artist + ' feat ' + ', '.join(sorted(set(featured_artists.split(','))))
    else:
        # If no featured artists, return the original
        combined_artists = artist_column
    return combined_artists

top_hits['Artists'] = top_hits['Artists'].apply(combine_artists)

# Step 3: Clean up the 'feat' section, making sure it looks clean
top_hits['Artists'] = top_hits['Artists'].str.replace('feat.', 'feat', case=False)

# Step 4: Remove any extra spaces around the artists' names and count the number of artists
top_hits['Artists'] = top_hits['Artists'].str.strip()
top_hits['Artist Count'] = top_hits['Artists'].apply(lambda x: len(set(x.split(','))))

# Display to verify changes
print(top_hits[['Track Name', 'Artists', 'Artist Count']].head())

        Track Name                   Artists  Artist Count
0         Into You             ariana grande             1
1    Glad You Came                the wanted             1
2       Dark Horse       katy perry, juicy j             2
3  Who Knew - Edit                      p!nk             1
4           Closer  the chainsmokers, halsey             2


In [7]:
duplicates = top_hits.duplicated()
print(duplicates.any()) 

True


In [8]:
duplicate_rows = top_hits[top_hits.duplicated()]
print(duplicate_rows)

            Track Name                Track ID       Artists  Artist Count
184  Sign of the Times  5Ohxk2dO5COHF1krpoPigN  harry styles             1


In [9]:
top_hits_clean = top_hits.drop_duplicates(keep='first')

In [10]:
top_hits_clean = top_hits_clean.copy()

In [11]:
top_hits_clean.loc[:, 'Track Name'] = top_hits_clean['Track Name'].str.lower()
top_hits_clean.loc[:, 'Artists'] = top_hits_clean['Artists'].str.lower()
top_hits_clean.loc[:, 'Track Name'] = top_hits_clean['Track Name'].str.strip()
top_hits_clean.loc[:, 'Artists'] = top_hits_clean['Artists'].str.strip()

In [12]:
top_hits_clean = top_hits_clean.assign(Artists=top_hits_clean['Artists'].str.split(', ')).explode('Artists')
top_hits_clean['Artists'] = top_hits_clean['Artists'].str.replace(', ', ' & ')

In [13]:
top_hits_clean['Track Name'] = top_hits_clean['Track Name'].str.replace(r'[^\w\s]', '', regex=True)

In [14]:
top_hits_clean['Artists'] = top_hits_clean['Artists'].astype('category')

In [15]:
top_hits_clean.to_csv('../data/top_hits.csv', index=False)

In [16]:
top_hits_data = pd.read_csv('../data/top_hits.csv')
top_hits_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 347 entries, 0 to 346
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Track Name    347 non-null    object
 1   Track ID      347 non-null    object
 2   Artists       347 non-null    object
 3   Artist Count  347 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 11.0+ KB


### Working on Women Pop

In [17]:
with open('../data/raw/combined_women_pop.json', 'r') as f:
    data = json.load(f)

In [18]:
tracks = []
for item in data['items']:
    track = item.get('track', {})
    track_name = track.get('name')
    track_id = track.get('id')
    artists = ", ".join(artist['name'] for artist in track.get('artists', []))  # Combine all artist names
    tracks.append({'Track Name': track_name, 'Track ID': track_id, 'Artists': artists})


In [19]:
women_pop = pd.DataFrame(tracks)
women_pop

Unnamed: 0,Track Name,Track ID,Artists
0,It's ok I'm ok,24XihnoVPWXlKJ4BgXqjVM,Tate McRae
1,Espresso,2qSkIjg1o9h3YT9RAgYN75,Sabrina Carpenter
2,That’s So True,7ne4VBA60CxGM75vw0EYad,Gracie Abrams
3,Teenage Dream,55qBw1900pZKfXJ6Q9A2Lc,Katy Perry
4,bye,1Rweq7vNjK4kZBbGrmxQsl,Ariana Grande
...,...,...,...
275,Pour It Up,5r67bGhYXZNqk2m2Wupfmu,Rihanna
276,Right Now,42Ow7PS3YtCWplolUUigDo,"Rihanna, David Guetta"
277,Pink Pony Club,1k2pQc5i348DCHwbn5KTdc,Chappell Roan
278,Si Antes Te Hubiera Conocido,6WatFBLVB0x077xWeoVc2k,KAROL G


In [20]:
# Step 1: Normalize the artist names
women_pop['Artists'] = women_pop['Artists'].str.lower().str.strip()

# Step 2: Handle featured artists and combine them into the same 'Artists' column
def combine_artists(artist_column):
    # Check if 'feat' exists, and if so, split and merge artists
    if 'feat' in artist_column:
        # Split the main artist and featured artist(s) and remove any extra spaces
        artists = artist_column.split('feat')
        main_artist = artists[0].strip()
        featured_artists = artists[1].strip()
        # Combine main artist with featured artists, avoiding duplicates
        combined_artists = main_artist + ' feat ' + ', '.join(sorted(set(featured_artists.split(','))))
    else:
        # If no featured artists, return the original
        combined_artists = artist_column
    return combined_artists

women_pop['Artists'] = women_pop['Artists'].apply(combine_artists)

# Step 3: Clean up the 'feat' section, making sure it looks clean
women_pop['Artists'] = women_pop['Artists'].str.replace('feat.', 'feat', case=False)

# Step 4: Remove any extra spaces around the artists' names and count the number of artists
women_pop['Artists'] = women_pop['Artists'].str.strip()
women_pop['Artist Count'] = women_pop['Artists'].apply(lambda x: len(set(x.split(','))))

# Display to verify changes
print(women_pop[['Track Name', 'Artists', 'Artist Count']].tail())


                                Track Name                Artists  \
275                             Pour It Up                rihanna   
276                              Right Now  rihanna, david guetta   
277                         Pink Pony Club          chappell roan   
278           Si Antes Te Hubiera Conocido                karol g   
279  You'll Always Find Your Way Back Home         hannah montana   

     Artist Count  
275             1  
276             2  
277             1  
278             1  
279             1  


In [21]:
duplicates = women_pop.duplicated()
print(duplicates.any()) 

True


In [22]:
duplicate_rows = women_pop[women_pop.duplicated()]
print(duplicate_rows)

      Track Name                Track ID    Artists  Artist Count
201  Bad Romance  0SiywuOBRcynK0uKGWdCnn  lady gaga             1


In [23]:
women_pop_clean = women_pop.drop_duplicates(keep='first')

In [24]:
women_pop_clean = women_pop_clean.copy()

In [25]:
women_pop_clean.loc[:, 'Track Name'] = women_pop_clean['Track Name'].str.lower()
women_pop_clean.loc[:, 'Artists'] = women_pop_clean['Artists'].str.lower()
women_pop_clean.loc[:, 'Track Name'] = women_pop_clean['Track Name'].str.strip()
women_pop_clean.loc[:, 'Artists'] = women_pop_clean['Artists'].str.strip()

In [26]:
women_pop_clean = women_pop_clean.assign(Artists=women_pop_clean['Artists'].str.split(', ')).explode('Artists')
women_pop_clean['Artists'] = women_pop_clean['Artists'].str.replace(', ', ' & ')

In [27]:
women_pop_clean['Track Name'] = women_pop_clean['Track Name'].str.replace(r'[^\w\s]', '', regex=True)

In [28]:
women_pop_clean['Artists'] = women_pop_clean['Artists'].astype('category')

In [29]:
women_pop_clean.to_csv('../data/women_pop.csv', index=False)

In [30]:
women_pop_data = pd.read_csv('../data/women_pop.csv')
women_pop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 337 entries, 0 to 336
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Track Name    337 non-null    object
 1   Track ID      337 non-null    object
 2   Artists       337 non-null    object
 3   Artist Count  337 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 10.7+ KB


In [31]:
playlist_df = pd.concat([top_hits_data, women_pop_data], ignore_index=True)

# Save the combined DataFrame to a new CSV
playlist_df.to_csv('../data/playlists.csv', index=False)

# Optionally, print the combined DataFrame
print(playlist_df)

                               Track Name                Track ID  \
0                                into you  76FZM38RC8XaAjJ77CVTNe   
1                           glad you came  5yDL13y5giogKs2fSNf7sj   
2                              dark horse  5jrdCoLpJSvHHorevXBATy   
3                              dark horse  5jrdCoLpJSvHHorevXBATy   
4                          who knew  edit  2hns6Dv29Yrg68AVTJiAyA   
..                                    ...                     ...   
679                             right now  42Ow7PS3YtCWplolUUigDo   
680                             right now  42Ow7PS3YtCWplolUUigDo   
681                        pink pony club  1k2pQc5i348DCHwbn5KTdc   
682          si antes te hubiera conocido  6WatFBLVB0x077xWeoVc2k   
683  youll always find your way back home  12wSL3tGk3MtbDEhfG7xy3   

            Artists  Artist Count  
0     ariana grande             1  
1        the wanted             1  
2        katy perry             2  
3           juicy j        

In [32]:
playlist_df = pd.read_csv('../data/playlists.csv')

# Check for duplicate rows (based on all columns)
duplicates = playlist_df[playlist_df.duplicated()]

# Display the duplicate rows
print(duplicates)

                                       Track Name                Track ID  \
352                                good luck babe  0WbMK4wrZ1wFSty9F7FCgu   
353                                 call me maybe  20I6sIOMTCkB6w7ryavxtO   
356                                         apple  19RybK6XDbAVpcdxSbZL1o   
370                                        greedy  3rUGC1vUpkDG9CZFHMur1t   
371                               i kissed a girl  14iN3o8ptQ8cFVZTEmyQRV   
372                              party in the usa  3E7dfMvvCLUddWissuqMwr   
374                                       yes and  7gaA3wERFkFkgivjwbSvkG   
389                            oopsi did it again  6naxalmIoLFWR0siv8dnQQ   
393                                     telephone  6nCDnzErqalOaIY3EJM8NK   
394                                     telephone  6nCDnzErqalOaIY3EJM8NK   
399                                  we cant stop  2y4lAQpi5VTNLu2ldeTdUH   
417        dance the night  from barbie the album  1vYXt7VSjH9JIM5oRRo7vA   

In [33]:
# Remove duplicates (keep the first occurrence)
playlists_clean = playlist_df.drop_duplicates()

# Or, if you want to remove duplicates based on specific columns (e.g., 'Track Name' and 'Artists')
playlists_clean = playlist_df.drop_duplicates(subset=['Track Name', 'Artists'])

# Display the cleaned DataFrame
print(playlists_clean)

                               Track Name                Track ID  \
0                                into you  76FZM38RC8XaAjJ77CVTNe   
1                           glad you came  5yDL13y5giogKs2fSNf7sj   
2                              dark horse  5jrdCoLpJSvHHorevXBATy   
3                              dark horse  5jrdCoLpJSvHHorevXBATy   
4                          who knew  edit  2hns6Dv29Yrg68AVTJiAyA   
..                                    ...                     ...   
679                             right now  42Ow7PS3YtCWplolUUigDo   
680                             right now  42Ow7PS3YtCWplolUUigDo   
681                        pink pony club  1k2pQc5i348DCHwbn5KTdc   
682          si antes te hubiera conocido  6WatFBLVB0x077xWeoVc2k   
683  youll always find your way back home  12wSL3tGk3MtbDEhfG7xy3   

            Artists  Artist Count  
0     ariana grande             1  
1        the wanted             1  
2        katy perry             2  
3           juicy j        

In [34]:
# Step 2: Create an SQLAlchemy engine to connect to the SQLite database (or create it if it doesn't exist)
engine = create_engine('sqlite:///../data/spotify_data.db', echo=True)

# Step 3: Save the DataFrame to the SQLite database (table name: 'top_hits')
playlists_clean.to_sql('playlists', engine, if_exists='replace', index=False)

2024-11-29 10:51:59,999 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-11-29 10:52:00,000 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("playlists")
2024-11-29 10:52:00,000 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:00,000 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("playlists")
2024-11-29 10:52:00,001 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:00,001 INFO sqlalchemy.engine.Engine 
CREATE TABLE playlists (
	"Track Name" TEXT, 
	"Track ID" TEXT, 
	"Artists" TEXT, 
	"Artist Count" BIGINT
)


2024-11-29 10:52:00,001 INFO sqlalchemy.engine.Engine [no key 0.00021s] ()
2024-11-29 10:52:00,004 INFO sqlalchemy.engine.Engine INSERT INTO playlists ("Track Name", "Track ID", "Artists", "Artist Count") VALUES (?, ?, ?, ?)
2024-11-29 10:52:00,004 INFO sqlalchemy.engine.Engine [generated in 0.00131s] [('into you', '76FZM38RC8XaAjJ77CVTNe', 'ariana grande', 1), ('glad you came', '5yDL13y5giogKs2fSNf7sj', 'the wanted', 1), ('dark horse', '5jrdCoLp

618

### Working with the data

#### Part 1: Top Hits

In [35]:
query = "SELECT artists FROM playlists"
artists_df = pd.read_sql(query, engine)

# Display the first few rows to check the data
print(artists_df.head())

2024-11-29 10:52:00,013 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-11-29 10:52:00,014 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("SELECT artists FROM playlists")
2024-11-29 10:52:00,014 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:00,015 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("SELECT artists FROM playlists")
2024-11-29 10:52:00,015 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:00,015 INFO sqlalchemy.engine.Engine SELECT artists FROM playlists
2024-11-29 10:52:00,015 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:00,017 INFO sqlalchemy.engine.Engine ROLLBACK
         Artists
0  ariana grande
1     the wanted
2     katy perry
3        juicy j
4           p!nk


In [36]:
# Get the frequency of each artist
artist_counts = artists_df['Artists'].value_counts()
top_50_artists = artist_counts.head(50)

# Display the frequency of artists
print(top_50_artists)

Artists
rihanna                     25
ariana grande               19
taylor swift                18
nicki minaj                 17
britney spears              14
lady gaga                   14
katy perry                  13
beyoncé                     12
demi lovato                  9
dua lipa                     9
little mix                   8
selena gomez                 8
justin bieber                7
paramore                     7
bruno mars                   6
charli xcx                   6
doja cat                     6
ava max                      6
avril lavigne                6
selena gomez & the scene     6
sabrina carpenter            6
destiny's child              5
jacob tillberg               5
kesha                        5
miley cyrus                  5
christina aguilera           4
sia                          4
rita ora                     4
olivia rodrigo               4
ku$h drifter                 4
justina valentine            4
one direction                4


In [37]:
top_50_artists.to_csv('../data/top_50_artists.csv', header=True)
pd.read_csv('../data/top_50_artists.csv')

Unnamed: 0,Artists,count
0,rihanna,25
1,ariana grande,19
2,taylor swift,18
3,nicki minaj,17
4,britney spears,14
5,lady gaga,14
6,katy perry,13
7,beyoncé,12
8,demi lovato,9
9,dua lipa,9


Looking at the dataframe, these are the artists that will be included in my analysis:

Male Artists:
- Justin Bieber
- Bruno Mars
- Ed Sheeran
- Flo Rida
- Pharrell Williams

Female Artists:
- Rihanna
- Ariana Grande
- Taylor Swift
- Nicki Minaj
- Britney Spears

### Getting tracks

In [38]:
justin = get_top_tracks("1uNFoZAHBGtllmzznpCI3s", access_token)
bruno = get_top_tracks("0du5cEVh5yTK9QJze8zA0C", access_token)
ed = get_top_tracks("6eUKZXaKkcviH0Ku9w2n3V", access_token)
flo = get_top_tracks("0jnsk9HBra6NMjO2oANoPY", access_token)
pharrell = get_top_tracks("2RdwBSPQiwcmiDo9kixcl8", access_token)

In [39]:
rihanna = get_top_tracks("5pKCCKE2ajJHZ9KAiaK11H", access_token)
ariana = get_top_tracks("66CXWjxzNUsdJxJ2JdwvnR", access_token)
taylor = get_top_tracks("06HL4z0CvFAxyc27GXpf02", access_token)
nicki = get_top_tracks("0hCNtLu0JehylgoiP8L4Gh", access_token)
britney = get_top_tracks("26dSoYclwsYLMAKD3tpOr4", access_token)

In [40]:
justin_df = pd.DataFrame(justin['tracks'])
bruno_df = pd.DataFrame(bruno['tracks'])
ed_df = pd.DataFrame(ed['tracks'])
flo_df = pd.DataFrame(flo['tracks'])
pharrell_df = pd.DataFrame(pharrell['tracks'])
rihanna_df = pd.DataFrame(rihanna['tracks'])
ariana_df = pd.DataFrame(ariana['tracks'])  
taylor_df = pd.DataFrame(taylor['tracks'])  
nicki_df = pd.DataFrame(nicki['tracks'])  
britney_df = pd.DataFrame(britney['tracks'])

In [41]:
justin_df['artist'] = 'Justin Bieber'
bruno_df['artist'] = 'Bruno Mars'
ed_df['artist'] = 'Ed Sheeran'
flo_df['artist'] = 'Flo Rida'
pharrell_df['artist'] = 'Pharrell Williams'
rihanna_df['artist'] = 'Rihanna'
ariana_df['artist'] = 'Ariana Grande'
taylor_df['artist'] = 'Taylor Swift'
nicki_df['artist'] = 'Nicki Minaj'
britney_df['artist'] = 'Britney Spears'

In [42]:
combined_tracks = pd.concat([justin_df, bruno_df, ed_df, flo_df, pharrell_df, rihanna_df, ariana_df, taylor_df, nicki_df, britney_df], ignore_index=True)
print(combined_tracks)

                                                album  \
0   {'album_type': 'album', 'artists': [{'external...   
1   {'album_type': 'album', 'artists': [{'external...   
2   {'album_type': 'album', 'artists': [{'external...   
3   {'album_type': 'album', 'artists': [{'external...   
4   {'album_type': 'album', 'artists': [{'external...   
..                                                ...   
95  {'album_type': 'album', 'artists': [{'external...   
96  {'album_type': 'single', 'artists': [{'externa...   
97  {'album_type': 'album', 'artists': [{'external...   
98  {'album_type': 'album', 'artists': [{'external...   
99  {'album_type': 'album', 'artists': [{'external...   

                                              artists  disc_number  \
0   [{'external_urls': {'spotify': 'https://open.s...            1   
1   [{'external_urls': {'spotify': 'https://open.s...            1   
2   [{'external_urls': {'spotify': 'https://open.s...            1   
3   [{'external_urls': {'spotify': 

In [43]:
combined_tracks = combined_tracks[['name', 'artist']]

In [44]:
gender_mapping = {
    'Justin Bieber': 0,  # Male
    'Bruno Mars': 0,     # Male
    'Ed Sheeran': 0,     # Male
    'Flo Rida': 0,       # Male
    'Pharrell Williams': 0,  # Male
    'Rihanna': 1,        # Female
    'Ariana Grande': 1,  # Female
    'Taylor Swift': 1,   # Female
    'Nicki Minaj': 1,    # Female
    'Britney Spears': 1  # Female
}

combined_tracks['gender'] = combined_tracks['artist'].map(gender_mapping)



In [45]:
combined_tracks.to_csv('../data/combined_top_tracks.csv', index=False)

In [46]:
combined_tracks = pd.DataFrame(combined_tracks)
combined_tracks

Unnamed: 0,name,artist,gender
0,STAY (with Justin Bieber),Justin Bieber,0
1,Ghost,Justin Bieber,0
2,Love Yourself,Justin Bieber,0
3,Sorry,Justin Bieber,0
4,Beauty And A Beat,Justin Bieber,0
...,...,...,...
95,Womanizer,Britney Spears,1
96,My Only Wish (This Year),Britney Spears,1
97,Hold Me Closer,Britney Spears,1
98,Criminal,Britney Spears,1


In [47]:
engine = create_engine('sqlite:///../data/spotify_data.db', echo=True)

# Step 3: Save the DataFrame to the SQLite database (table name: 'top_tracks')
combined_tracks.to_sql('top_tracks', engine, if_exists='replace', index=False)

print("Table 'top_tracks' has been created successfully in the database.")

2024-11-29 10:52:01,478 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-11-29 10:52:01,479 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("top_tracks")
2024-11-29 10:52:01,480 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:01,481 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("top_tracks")
2024-11-29 10:52:01,481 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-11-29 10:52:01,483 INFO sqlalchemy.engine.Engine 
CREATE TABLE top_tracks (
	name TEXT, 
	artist TEXT, 
	gender BIGINT
)


2024-11-29 10:52:01,484 INFO sqlalchemy.engine.Engine [no key 0.00055s] ()
2024-11-29 10:52:01,485 INFO sqlalchemy.engine.Engine INSERT INTO top_tracks (name, artist, gender) VALUES (?, ?, ?)
2024-11-29 10:52:01,486 INFO sqlalchemy.engine.Engine [generated in 0.00053s] [('STAY (with Justin Bieber)', 'Justin Bieber', 0), ('Ghost', 'Justin Bieber', 0), ('Love Yourself', 'Justin Bieber', 0), ('Sorry', 'Justin Bieber', 0), ('Beauty And A Beat', 'Justin Bieber', 0), ('Baby', 'Justin Bi