In [1]:
import spotipy as sp
from spotipy.oauth2 import SpotifyClientCredentials
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import re
import string
import ids
import tensorflow as tf
import keras
from keras.preprocessing.text import Tokenizer

In [2]:
spot_client_id=ids.SPOT_CLIENT_ID
spot_client_secret=ids.SPOT_CLIENT_SECRET
spot_client_credentials_manager = SpotifyClientCredentials(client_id=spot_client_id, client_secret=spot_client_secret)
spotify = sp.Spotify(client_credentials_manager=spot_client_credentials_manager)

genius_access_token = ids.GENIUS_ACCESS_TOKEN
genius_header = {'Authorization': f'Bearer {genius_access_token}'}

In [4]:
artist = 'frank ocean'
artist_search = spotify.search(q=f'artist:{artist}', type='artist')
artist_uri = artist_search['artists']['items'][0]['uri']

In [5]:
song_list = []
for i in range(10):
    song_list.append(spotify.artist_top_tracks(artist_uri)['tracks'][i]['name'])

song_list

['Pink + White',
 'Lost',
 'Novacane',
 'Ivy',
 'Thinkin Bout You',
 'Slide (feat. Frank Ocean & Migos)',
 'Nights',
 'White Ferrari',
 'In My Room',
 'Pyramids']

In [11]:
word_arr = []
lyric_arr = []

for song in song_list:
    query = f'http://api.genius.com/search?q={artist}-{song}'

    search_req = requests.get(query, headers=genius_header)
    url = search_req.json()['response']['hits'][0]['result']['url']

    result = requests.get(url)
    content = result.content
    soup = BeautifulSoup(content, 'html.parser')
    result.close()

    lyrics = ''

    for tag in soup.select('div[class^="Lyrics__Container"], .song_body-lyrics p'):
        t = tag.get_text(strip=True, separator='\n')

        if t:
            lyrics += t

    lyrics = re.sub('\\[.*?\\]', '', lyrics)
    lyrics = re.sub('\n\n', '\n', lyrics)

    words = []
    for word in lyrics.split(' '):
        if not '\n' in word:
            words.append(word)
        else:
            phrase = word.split('\n')
            for i in range(len(phrase)-1):
                if phrase[i] != '':
                    words.append(phrase[i])
                    words.append('\n')
            if phrase[-1] != '':
                words.append(phrase[-1])     

    word_arr.append(words) 

    lyric_arr.append(lyrics)

word_arr

[['Yeah,',
  'yeah,',
  'um',
  '(Woo)',
  '\n',
  'Yeah,',
  'yeah,',
  'yeah',
  '\n',
  "That's",
  'the',
  'way',
  'every',
  'day',
  'goes',
  '\n',
  'Every',
  'time',
  "we've",
  'no',
  'control',
  '\n',
  'If',
  'the',
  'sky',
  'is',
  'pink',
  'and',
  'white',
  '\n',
  'If',
  'the',
  'ground',
  'is',
  'black',
  'and',
  'yellow',
  '\n',
  "It's",
  'the',
  'same',
  'way',
  'you',
  'showed',
  'me',
  '\n',
  'Nod',
  'my',
  'head,',
  "don't",
  'close',
  'my',
  'eyes',
  '\n',
  'Halfway',
  'on',
  'a',
  'slow',
  'move',
  '\n',
  "It's",
  'the',
  'same',
  'way',
  'you',
  'showed',
  'me',
  '\n',
  'If',
  'you',
  'could',
  'fly,',
  'then',
  "you'd",
  'feel',
  'south',
  '\n',
  'Up',
  "north's",
  'getting',
  'cold',
  'soon',
  '\n',
  'The',
  'way',
  'it',
  'is,',
  "we're",
  'on',
  'land',
  '\n',
  'Still,',
  "I'm",
  'someone',
  'to',
  'hold',
  'true',
  '\n',
  'Keep',
  'you',
  'cool',
  'when',
  "it's",
  'still',

In [10]:
song = word_arr[0]
print(' '.join(song))

Yeah, yeah, um (Woo) 
 Yeah, yeah, yeah 
 That's the way every day goes 
 Every time we've no control 
 If the sky is pink and white 
 If the ground is black and yellow 
 It's the same way you showed me 
 Nod my head, don't close my eyes 
 Halfway on a slow move 
 It's the same way you showed me 
 If you could fly, then you'd feel south 
 Up north's getting cold soon 
 The way it is, we're on land 
 Still, I'm someone to hold true 
 Keep you cool when it's still alive 
 Won't let you down when it's all ruin 
 Just the same way you showed me, showed me 
 You showed me love 
 Glory from above 
 Regard, my dear 
 It's all downhill from here 
 In the wake of a hurricane 
 Dark skin of a summer shade 
 Nosedive into flood lines 
 Tall tower of milk crates 
 It's the same way you showed me 
 Cannonball off the porch side 
 Older kids trying off the rooves 
 Just the same way you showed me (You showed) 
 If you could die and come back to life 
 Up for air from the swimming pool 
 You'd kneel 

In [14]:
print(lyric_arr[0])


Yeah, yeah, um (Woo)
Yeah, yeah, yeah
That's the way every day goes
Every time we've no control
If the sky is pink and white
If the ground is black and yellow
It's the same way you showed me
Nod my head, don't close my eyes
Halfway on a slow move
It's the same way you showed me
If you could fly, then you'd feel south
Up north's getting cold soon
The way it is, we're on land
Still, I'm someone to hold true
Keep you cool when it's still alive
Won't let you down when it's all ruin
Just the same way you showed me, showed me
You showed me love
Glory from above
Regard, my dear
It's all downhill from here
In the wake of a hurricane
Dark skin of a summer shade
Nosedive into flood lines
Tall tower of milk crates
It's the same way you showed me
Cannonball off the porch side
Older kids trying off the rooves
Just the same way you showed me (You showed)
If you could die and come back to life
Up for air from the swimming pool
You'd kneel down to the dry land
Kiss the earth that birthed you
Gave you

In [15]:
corpus = lyric_arr[0].lower().split('\n')

In [17]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)

In [6]:
test = word_arr[0]
test

['Yeah,',
 'yeah,',
 'um',
 '(Woo)',
 '\n',
 'Yeah,',
 'yeah,',
 'yeah',
 '\n',
 "That's",
 'the',
 'way',
 'every',
 'day',
 'goes',
 '\n',
 'Every',
 'time',
 "we've",
 'no',
 'control',
 '\n',
 'If',
 'the',
 'sky',
 'is',
 'pink',
 'and',
 'white',
 '\n',
 'If',
 'the',
 'ground',
 'is',
 'black',
 'and',
 'yellow',
 '\n',
 "It's",
 'the',
 'same',
 'way',
 'you',
 'showed',
 'me',
 '\n',
 'Nod',
 'my',
 'head,',
 "don't",
 'close',
 'my',
 'eyes',
 '\n',
 'Halfway',
 'on',
 'a',
 'slow',
 'move',
 '\n',
 "It's",
 'the',
 'same',
 'way',
 'you',
 'showed',
 'me',
 '\n',
 'If',
 'you',
 'could',
 'fly,',
 'then',
 "you'd",
 'feel',
 'south',
 '\n',
 'Up',
 "north's",
 'getting',
 'cold',
 'soon',
 '\n',
 'The',
 'way',
 'it',
 'is,',
 "we're",
 'on',
 'land',
 '\n',
 'Still,',
 "I'm",
 'someone',
 'to',
 'hold',
 'true',
 '\n',
 'Keep',
 'you',
 'cool',
 'when',
 "it's",
 'still',
 'alive',
 '\n',
 "Won't",
 'let',
 'you',
 'down',
 'when',
 "it's",
 'all',
 'ruin',
 '\n',
 'Just',


In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(test)
seqs = tokenizer.texts_to_sequences(test)
total_words = len(tokenizer.word_index) + 1
total_words

154

In [8]:
for i in range(len(seqs)):
    if len(seqs[i]) == 0:
        seqs[i].append(0)

seqs

[[10],
 [10],
 [45],
 [46],
 [0],
 [10],
 [10],
 [10],
 [0],
 [27],
 [1],
 [5],
 [28],
 [47],
 [48],
 [0],
 [28],
 [49],
 [50],
 [51],
 [52],
 [0],
 [15],
 [1],
 [53],
 [11],
 [54],
 [16],
 [55],
 [0],
 [15],
 [1],
 [56],
 [11],
 [57],
 [16],
 [58],
 [0],
 [7],
 [1],
 [8],
 [5],
 [2],
 [3],
 [4],
 [0],
 [59],
 [12],
 [60],
 [61],
 [62],
 [12],
 [63],
 [0],
 [64],
 [29],
 [20],
 [65],
 [66],
 [0],
 [7],
 [1],
 [8],
 [5],
 [2],
 [3],
 [4],
 [0],
 [15],
 [2],
 [30],
 [67],
 [68],
 [31],
 [69],
 [70],
 [0],
 [9],
 [71],
 [72],
 [73],
 [74],
 [0],
 [1],
 [5],
 [17],
 [11],
 [75],
 [29],
 [32],
 [0],
 [33],
 [76],
 [77],
 [6],
 [78],
 [79],
 [0],
 [80],
 [2],
 [81],
 [21],
 [7],
 [33],
 [34],
 [0],
 [82],
 [83],
 [2],
 [35],
 [21],
 [7],
 [18],
 [84],
 [0],
 [22],
 [1],
 [8],
 [5],
 [2],
 [3],
 [4],
 [3],
 [4],
 [0],
 [2],
 [3],
 [4],
 [36],
 [0],
 [37],
 [13],
 [38],
 [0],
 [39],
 [12],
 [40],
 [0],
 [7],
 [18],
 [41],
 [13],
 [23],
 [0],
 [85],
 [1],
 [86],
 [19],
 [20],
 [87],
 [0],
 [88]

In [10]:
dataset = tf.data.Dataset.from_tensor_slices(seqs) 

In [11]:
list(dataset.take(3).as_numpy_iterator())

[array([10], dtype=int32), array([10], dtype=int32), array([45], dtype=int32)]

In [12]:
sequences = dataset.batch(4)
list(sequences.take(2).as_numpy_iterator())

[array([[10],
        [10],
        [45],
        [46]], dtype=int32),
 array([[ 0],
        [10],
        [10],
        [10]], dtype=int32)]

In [13]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    output_text = chunk[1:]
    return input_text, output_text

In [14]:
temp = sequences.map(split_input_target)

In [15]:
list(temp.take(1).as_numpy_iterator())

[(array([[10],
         [10],
         [45]], dtype=int32),
  array([[10],
         [45],
         [46]], dtype=int32))]

In [None]:
data = temp.shuffle(1000).batch(4, drop_remainder=True)

In [None]:
#creating a loss function
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [None]:
model = keras.Sequential([
   keras.layers.Embedding(total_words, 100, input_length=)
])

In [None]:
x_train = []
y_train = []

for x in list(temp.as_numpy_iterator()):
    x_train.append(x[0])
    y_train.append(x[1])

x_train.pop()
y_train.pop()   

In [None]:
#compile model
model.compile(optimizer='adam', loss='mse')
history = model.fit(x=x_train, y=y_train, epochs=2)

In [None]:
dir(temp)