## Importing libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import nltk  
from nltk.tokenize import TreebankWordTokenizer
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

## Download a HTML page with links to songs (lyrics)

In [2]:
main_url = 'https://www.lyrics.com'
artist_sublink_1 = '/artist/Dua-Lipa/3128482'
artist_sublink_2 = '/artist/Cardi-B/3195462'
url_1 = main_url + artist_sublink_1
url_2 = main_url + artist_sublink_2

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}


In [3]:
response_1 = requests.get(url_1)
response_2 = requests.get(url_2)

In [4]:
response_1
response_2

<Response [200]>

In [5]:
lyrics_html_1 = response_1.text
lyrics_html_2 = response_2.text

## Extracting all (song) links with Regex

In [6]:
import re

In [7]:
# Extracting all links
#links = re.findall('href.{100}', response.text)
#links

In [8]:
# Extracting all song links
#song_links = re.findall('\/lyric\/.*?(?=")', response.text)
#song_links

## Download songs

In [9]:
# Create a list with all the song URLs

#links = []
#url = 'https://www.lyrics.com'
#for song in song_links:
#    link = url + song
#    links.append(link)
#    
#links

## Extracting all (song) links with BeautifulSoup

In [10]:
from bs4 import BeautifulSoup

In [11]:
songs_soup_1 = BeautifulSoup(response_1.text)
songs_soup_2 = BeautifulSoup(response_2.text)

In [12]:
links_1 = songs_soup_1.find_all(class_='tal qx')
links_2 = songs_soup_2.find_all(class_='tal qx')

In [13]:
links_list_1 = []

for link in links_1:
    web_link = link.a['href']
    song_url = main_url + web_link
    links_list_1.append(song_url)
    print(song_url)
    
links_list_2 = []

for link in links_2:
    web_link = link.a['href']
    song_url = main_url + web_link
    links_list_2.append(song_url)
    print(song_url)
       


https://www.lyrics.com/lyric/36071585/Dua+Lipa/Be+the+One+%5BAcoustic%5D
https://www.lyrics.com/lyric/36050494/Dua+Lipa/Homesick
https://www.lyrics.com/lyric/36111214/Dua+Lipa/Be+the+One
https://www.lyrics.com/lyric/36633925/Dua+Lipa/Don%27t+Start+Now
https://www.lyrics.com/lyric/36084941/Dua+Lipa/Kiss+and+Make+Up
https://www.lyrics.com/lyric/36084846/Dua+Lipa/New+Rules
https://www.lyrics.com/lyric/36385928/Dua+Lipa/New+Rules
https://www.lyrics.com/lyric/36074914/Dua+Lipa/Kiss+and+Make+Up
https://www.lyrics.com/lyric/36389072/Dua+Lipa/New+Rules
https://www.lyrics.com/lyric/35873950/Dua+Lipa/Electricity
https://www.lyrics.com/lyric/36185116/Dua+Lipa/Swan+Song
https://www.lyrics.com/lyric/36185615/Dua+Lipa/Swan+Song
https://www.lyrics.com/lyric/36143684/Dua+Lipa/Be+the+One
https://www.lyrics.com/lyric/36255338/Dua+Lipa/High
https://www.lyrics.com/lyric/36367614/Dua+Lipa/IDGAF
https://www.lyrics.com/lyric/36367703/Dua+Lipa/Be+the+One
https://www.lyrics.com/lyric/34949113/Dua+Lipa/No+Lie
h

In [14]:
# We want to get rid of the links to songs that are remixes or alternative versions. Since these contain "()" or "[]", we should remove links containing those characters.

links_strip_1 = []

for link in links_list_1:
    split_link = link.split("%5B")[0].strip()
    split_link = split_link.split("%28")[0].strip()
    if split_link not in links_strip_1:
        links_strip_1.append(split_link)
        
links_strip_1

['https://www.lyrics.com/lyric/36071585/Dua+Lipa/Be+the+One+',
 'https://www.lyrics.com/lyric/36050494/Dua+Lipa/Homesick',
 'https://www.lyrics.com/lyric/36111214/Dua+Lipa/Be+the+One',
 'https://www.lyrics.com/lyric/36633925/Dua+Lipa/Don%27t+Start+Now',
 'https://www.lyrics.com/lyric/36084941/Dua+Lipa/Kiss+and+Make+Up',
 'https://www.lyrics.com/lyric/36084846/Dua+Lipa/New+Rules',
 'https://www.lyrics.com/lyric/36385928/Dua+Lipa/New+Rules',
 'https://www.lyrics.com/lyric/36074914/Dua+Lipa/Kiss+and+Make+Up',
 'https://www.lyrics.com/lyric/36389072/Dua+Lipa/New+Rules',
 'https://www.lyrics.com/lyric/35873950/Dua+Lipa/Electricity',
 'https://www.lyrics.com/lyric/36185116/Dua+Lipa/Swan+Song',
 'https://www.lyrics.com/lyric/36185615/Dua+Lipa/Swan+Song',
 'https://www.lyrics.com/lyric/36143684/Dua+Lipa/Be+the+One',
 'https://www.lyrics.com/lyric/36255338/Dua+Lipa/High',
 'https://www.lyrics.com/lyric/36367614/Dua+Lipa/IDGAF',
 'https://www.lyrics.com/lyric/36367703/Dua+Lipa/Be+the+One',
 'htt

In [15]:
links_strip_2 = []

for link in links_list_2:
    split_link = link.split("%5B")[0].strip()
    split_link = split_link.split("%28")[0].strip()
    if split_link not in links_strip_2:
        links_strip_2.append(split_link)
        
links_strip_2

['https://www.lyrics.com/lyric/36628235/Cardi+B/Please+Me',
 'https://www.lyrics.com/lyric/36628182/Cardi+B/Press',
 'https://www.lyrics.com/lyric/36111218/Cardi+B/I+Like+It',
 'https://www.lyrics.com/lyric/36331894/Cardi+B/Rodeo',
 'https://www.lyrics.com/lyric/36021583/Cardi+B/Clout',
 'https://www.lyrics.com/lyric/36259559/Cardi+B/Wish+Wish',
 'https://www.lyrics.com/lyric/36086287/Cardi+B/Clout',
 'https://www.lyrics.com/lyric/36095502/Cardi+B/On+Me',
 'https://www.lyrics.com/lyric/36095501/Cardi+B/Money',
 'https://www.lyrics.com/lyric/36095480/Cardi+B/I+Like+It',
 'https://www.lyrics.com/lyric/36095466/Cardi+B/Bodak+Yellow',
 'https://www.lyrics.com/lyric/35948718/Cardi+B/I+Like+It',
 'https://www.lyrics.com/lyric/35948704/Cardi+B/Girls+Like+You',
 'https://www.lyrics.com/lyric/36385927/Cardi+B/Press',
 'https://www.lyrics.com/lyric/36389071/Cardi+B/Press',
 'https://www.lyrics.com/lyric/36259015/Cardi+B/South+of+the+Border',
 'https://www.lyrics.com/lyric/36030011/Cardi+B/Please

In [16]:
# we notice some songs are repeated. 
# We will then create a list with unique songs with the help of a set.

cleanset_dua = set()
cleanlist_dua = []

for x in links_strip_1:
    y = x.split('/')[-1].replace("+", " ").strip()
    
    if y not in cleanset_dua:
        cleanlist_dua.append(x)
        cleanset_dua.add(y)

    
cleanlist_dua
print(len(cleanlist_dua))
len(links_strip_1)

68


446

In [17]:
cleanlist_dua = cleanlist_dua[:60]

In [18]:
cleanset_cardi = set()
cleanlist_cardi = []

for x in links_strip_2:
    y = x.split('/')[-1].replace("+", " ").strip()
    
    if y not in cleanset_cardi:
        cleanlist_cardi.append(x)
        cleanset_cardi.add(y)

    
cleanlist_cardi
print(len(cleanlist_cardi))
len(links_strip_2)

81


227

In [19]:
cleanlist_cardi = cleanlist_cardi[:60]

In [20]:
#for link in my_list_1:
#    response = requests.get(link, headers=headers).text
#    songs_soup = BeautifulSoup(response, 'html.parser')
#    lyrics = songs_soup.find(class_='lyric-body').text
#    song_title = link.split('/')[6].replace("+", " ")
#    song_title = song_title.split('/')[0].replace("%27", "'")
#    song_title = song_title.split('/')[0].replace("%C3%AD", "í")
#    with open(f'{song_title}.csv', 'w', encoding='utf-8') as f:
#        f.write(lyrics)
#        print(song_title)
    #time.sleep(5)

In [21]:
#for link in my_list_2:
#    response = requests.get(link, headers=headers).text
#    songs_soup = BeautifulSoup(response, 'html.parser')
#    lyrics = songs_soup.find(class_='lyric-body').text
#    song_title = link.split('/')[6].replace("+", " ")
#    song_title = song_title.split('/')[0].replace("%27", "'")
#    song_title = song_title.split('/')[0].replace("%C3%AD", "í")   
#    with open(f'{song_title}.txt', 'w', encoding='utf-8') as f:
#        f.write(lyrics)
#        print(song_title)

## Build a lyric corpus

In [22]:
all_lyrics = []

for fn in os.listdir('/Users/ivandominguez/Desktop/Spiced_Academy/spiced_projects/week_04/Github/week_04/Lyrics/Dua_lipa/'):
    dua = open(f"/Users/ivandominguez/Desktop/Spiced_Academy/spiced_projects/week_04/Github/week_04/Lyrics/Dua_lipa/{fn}").read()
    all_lyrics.append(dua)

    
for fn in os.listdir('/Users/ivandominguez/Desktop/Spiced_Academy/spiced_projects/week_04/Github/week_04/Lyrics/Cardi_b/'):
    cardi = open(f"/Users/ivandominguez/Desktop/Spiced_Academy/spiced_projects/week_04/Github/week_04/Lyrics/Cardi_b/{fn}").read()
    all_lyrics.append(cardi)

In [23]:
all_lyrics

["I bet we're higher than the people on cloud nine\nThis connection got me feeling like\nI've known you my whole life, wow\nEven our shadows know each other in the light\nSo why do I think of losing you\nWhen you're right by my side, uh\n\nI'm not really thinking clear\nToo good to be true and that's what I fear\nLike what if you left me here\nAnd I fill the floor with my sequin tears?\nI start getting in my brain\nSomething I do that I can't explain\nWicked love's creepin' up\n\nSo baby, don't you let go (uh-uh)\nThe thought of you with someone kills me\nI'll be dancing with my heart broke\nSuch a sad disco, if it ain't me\nAnd baby, if you ever (uh-uh)\nFind another lover, you'll see\nI'll be dancing with my heart broke\nSuch a sad disco, if it ain't me\n\nI got a little devil dancing in my mind\nPlaying tricks on my imagination\nRuining my night, wow\nI try to shake it off by looking in your eyes\nAll the voices in my head keep tryna\nTell me it's goodbye, uh\n\nI'm not really think

In [24]:
# Clean up the data to lower case

CORPUS = (all_lyrics)
CORPUS = [s.lower() for s in CORPUS]

Tokenize and lemmatize

In [25]:
CLEAN_CORPUS = []

tokenizer = TreebankWordTokenizer()
lemmatizer = WordNetLemmatizer()


for doc in CORPUS:
    tokens = tokenizer.tokenize(text=doc)
    clean_doc = " ".join(lemmatizer.lemmatize(token) for token in tokens)
    CLEAN_CORPUS.append(clean_doc)
    
CLEAN_CORPUS

["i bet we 're higher than the people on cloud nine this connection got me feeling like i 've known you my whole life , wow even our shadow know each other in the light so why do i think of losing you when you 're right by my side , uh i 'm not really thinking clear too good to be true and that 's what i fear like what if you left me here and i fill the floor with my sequin tear ? i start getting in my brain something i do that i ca n't explain wicked love 's creepin ' up so baby , do n't you let go ( uh-uh ) the thought of you with someone kill me i 'll be dancing with my heart broke such a sad disco , if it ai n't me and baby , if you ever ( uh-uh ) find another lover , you 'll see i 'll be dancing with my heart broke such a sad disco , if it ai n't me i got a little devil dancing in my mind playing trick on my imagination ruining my night , wow i try to shake it off by looking in your eye all the voice in my head keep tryna tell me it 's goodbye , uh i 'm not really thinking clear t

**Vectorize the text using the Bag Of Words method**

In [26]:
import nltk
nltk.download('stopwords')

STOPWORDS = stopwords.words('english')

# create labels for classification
LABELS = ['Dua Lipa']*60 + ['Cardi B'] *60

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ivandominguez/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


**Vectorize text**

In [27]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(stop_words=STOPWORDS) # instanciation

In [28]:
vectors = vectorizer.fit_transform(CLEAN_CORPUS)

In [29]:
vectors

<120x3803 sparse matrix of type '<class 'numpy.int64'>'
	with 12387 stored elements in Compressed Sparse Row format>

In [30]:
vectors.todense()

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]])

In [31]:
vectorizer.get_feature_names()



['08',
 '10',
 '12',
 '14hunna',
 '15',
 '180',
 '20',
 '2020',
 '2054',
 '21',
 '23',
 '2k',
 '2pac',
 '300',
 '305',
 '40',
 '400',
 '44',
 '45',
 '488',
 '4hunnid',
 '4l',
 '4pm',
 '50',
 '59',
 '5y',
 '68',
 '6ix9ine',
 '800',
 '80k',
 'a1',
 'abort',
 'abraza',
 'abrupt',
 'abuse',
 'acabé',
 'acaricio',
 'access',
 'account',
 'accountant',
 'accustomed',
 'ace',
 'acento',
 'achin',
 'acknowledge',
 'across',
 'act',
 'actin',
 'action',
 'activos',
 'actually',
 'acuesto',
 'add',
 'addict',
 'addicted',
 'adidas',
 'adiós',
 'adjacent',
 'administer',
 'adore',
 'adrenaline',
 'affi',
 'afford',
 'afraid',
 'afta',
 'aggression',
 'agh',
 'ago',
 'agree',
 'aguantaella',
 'ah',
 'aha',
 'ahead',
 'ahh',
 'ahhh',
 'ahle',
 'ahme',
 'ahí',
 'ai',
 'aight',
 'aim',
 'air',
 'airplane',
 'aka',
 'alba',
 'album',
 'alcohol',
 'alive',
 'ally',
 'allá',
 'almost',
 'alone',
 'along',
 'alpha',
 'already',
 'alright',
 'alriht',
 'always',
 'amanecea',
 'amar',
 'amazing',
 'ammunit

**Let's put the vectorized lyrics in a pandas dataframe with the labels**

In [32]:
import pandas as pd

pd.DataFrame(vectors.todense(), columns=vectorizer.get_feature_names(), index=LABELS)

Unnamed: 0,08,10,12,14hunna,15,180,20,2020,2054,21,...,아직,알지만,언젠가는,없어,없을지,우릴,있는,필요,하고,하지
Dua Lipa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dua Lipa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dua Lipa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dua Lipa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dua Lipa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cardi B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Cardi B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Cardi B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Cardi B,0,0,0,0,0,0,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## TF-IDF & Logistic Regression
Train a classification model that predicts the artist from a piece of text

In [33]:
tf = TfidfTransformer() 


vectors_normalized = tf.fit_transform(vectors)

In [34]:
vectors_normalized

<120x3803 sparse matrix of type '<class 'numpy.float64'>'
	with 12387 stored elements in Compressed Sparse Row format>

In [35]:
Lyrics_DF = pd.DataFrame(vectors_normalized.todense(), columns=vectorizer.get_feature_names(), index=LABELS)



In [36]:
vectorizer = TfidfVectorizer(stop_words=STOPWORDS)

X = vectorizer.fit_transform(CLEAN_CORPUS)

In [37]:
lr = LogisticRegression()

In [38]:
lr.fit(X, LABELS)

In [39]:
lr.score(X, LABELS)

1.0

In [40]:
Lyrics_DF

Unnamed: 0,08,10,12,14hunna,15,180,20,2020,2054,21,...,아직,알지만,언젠가는,없어,없을지,우릴,있는,필요,하고,하지
Dua Lipa,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dua Lipa,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dua Lipa,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dua Lipa,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dua Lipa,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cardi B,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.0,0.0,0.0,0.0,0.0,0.0,0.047699,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
steps = [('tf-idf', TfidfVectorizer(stop_words=STOPWORDS)),
         
          ('LR', LogisticRegression())
        ]

pipeline = Pipeline(steps)

In [42]:
pipeline.fit(CLEAN_CORPUS, LABELS)

In [43]:
pipeline.predict(["If you don't wanna see me dancing with somebody", "I'm levitating"])

array(['Dua Lipa', 'Dua Lipa'], dtype='<U8')

In [44]:
pipeline.predict_proba(["If you don't wanna see me dancing with somebody", "I'm levitating"])

array([[0.37352874, 0.62647126],
       [0.40579171, 0.59420829]])

## Naive Bayes

In [45]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB

In [46]:
def fit_model(X_train, y_train):
    """Return a trained model based on the corpus.
    Take the corpus and labels."""

    model = make_pipeline(
        TfidfVectorizer(),
        MultinomialNB()
    )

    fitted_model = model.fit(X_train, y_train)

    return fitted_model

In [47]:
def make_predictions(model, X_test):
    """Returns the class with most probability and also correspondent probabilities.
    Takes a fitted model and X_test (unseen data)."""

    X_test = [X_test]
    
    predictions = model.predict(X_test)
    probs = model.predict_proba(X_test)
    

    return predictions, probs

In [48]:
fitted_model = fit_model(CLEAN_CORPUS, LABELS)

In [49]:
X_test = "If you don't wanna see me dancing with somebody"

In [50]:
predictions = make_predictions(model = fitted_model, X_test = X_test)
predictions

(array(['Dua Lipa'], dtype='<U8'), array([[0.35674072, 0.64325928]]))

In [51]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay

In [52]:
X = vectorizer.fit_transform(CLEAN_CORPUS)
y = LABELS

In [53]:
X

<120x3803 sparse matrix of type '<class 'numpy.float64'>'
	with 12387 stored elements in Compressed Sparse Row format>

In [54]:
Lyrics_DF.index

Index(['Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa',
       'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa',
       ...
       'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B',
       'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B'],
      dtype='object', length=120)

In [55]:
X_train, X_test, y_train, y_test = train_test_split(Lyrics_DF, Lyrics_DF.index, random_state=10)

In [58]:
X_train

Unnamed: 0,08,10,12,14hunna,15,180,20,2020,2054,21,...,아직,알지만,언젠가는,없어,없을지,우릴,있는,필요,하고,하지
Dua Lipa,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.121613,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.046171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dua Lipa,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cardi B,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
y_train

Index(['Dua Lipa', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B',
       'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B', 'Cardi B',
       'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Cardi B', 'Cardi B',
       'Cardi B', 'Cardi B', 'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa',
       'Dua Lipa', 'Cardi B', 'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa',
       'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Cardi B', 'Cardi B',
       'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Cardi B', 'Cardi B', 'Cardi B',
       'Cardi B', 'Cardi B', 'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Cardi B',
       'Dua Lipa', 'Dua Lipa', 'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Cardi B',
       'Dua Lipa', 'Cardi B', 'Dua Lipa', 'Cardi B', 'Cardi B', 'Dua Lipa',
       'Dua Lipa', 'Cardi B', 'Cardi B', 'Cardi B', 'Dua Lipa', 'Dua Lipa',
       'Cardi B', 'Cardi B', 'Dua Lipa', 'Cardi B', 'Cardi B', 'Dua Lipa',
       'Dua Lipa', 'Cardi B', 'Dua Lipa', 'Dua Lipa', 'Dua Lipa', 'Cardi B',
       'Dua Lip

In [61]:
len(y_train)

90

In [1]:
#fitted_model = fit_model(X_train, y_train)

In [None]:
predictions = make_predictions(model = fitted_model, X_test = X_test)
predictions