# Notebook to start exploring the MusicO-Net Library

In [2]:
import pandas as pd
from collections import Counter
import numpy as np
import re

import torch
import torchtext
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import Vocab
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset, TensorDataset
import spacy
from sklearn.model_selection import train_test_split
spacy.load('en_core_web_sm')



<spacy.lang.en.English at 0x7fc0ed8600d0>

In [3]:

# Import relevant tabbles
df_features = pd.read_csv("data/musicoset_songfeatures/acoustic_features.csv", sep="\t")
df_lyrics = pd.read_csv("data/musicoset_songfeatures/lyrics.csv", sep="\t")
df_song = pd.read_csv("data/musicoset_metadata/songs.csv", sep="\t")

#SQL Join tables to create dataframe suitable for filtering and exploration
df = df_song.merge(right=df_features, how='inner', on="song_id")
df = df.merge(right=df_lyrics, how='inner', on="song_id")

In [4]:
df.head()

Unnamed: 0,song_id,song_name,billboard,artists,popularity,explicit,song_type,duration_ms,key,mode,...,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,lyrics
0,3e9HZxeyfWwjeyPAMmWSSQ,"thank u, next","('Thank U, Next', 'Ariana Grande')",{'66CXWjxzNUsdJxJ2JdwvnR': 'Ariana Grande'},86,True,Solo,207320,1,1,...,0.229,0.717,0.653,0.0,0.101,-5.634,0.0658,0.412,106.966,['[Verse 1]\nThought I\'d end up with Sean\nBu...
1,5p7ujcrUXASCNwRaWNHR1C,Without Me,"('Without Me', 'Halsey')",{'26VFTg2z8YR0cCuwLzESi2': 'Halsey'},87,True,Solo,201661,6,1,...,0.297,0.752,0.488,9e-06,0.0936,-7.05,0.0705,0.533,136.041,"[""[Verse 1]\nFound you when your heart was bro..."
2,2xLMifQCjDGFmkHkpNLD9h,SICKO MODE,"('Sicko Mode', 'Travis Scott')",{'0Y5tJX1MQlPlqiwlOH1tJY': 'Travis Scott'},85,True,Solo,312820,8,1,...,0.00513,0.834,0.73,0.0,0.124,-3.714,0.222,0.446,155.008,"['[Part I]\n\n[Intro: Drake]\nAstro, yeah\nSun..."
3,3KkXRkHbMCARz0aVfEt68P,Sunflower - Spider-Man: Into the Spider-Verse,('Sunflower (Spider-Man: Into The Spider-Verse...,"{'246dkjvS1zLTtiykXe5h60': 'Post Malone', '1zN...",92,False,Collaboration,158040,2,1,...,0.556,0.76,0.479,0.0,0.0703,-5.574,0.0466,0.913,89.911,
4,1rqqCSm0Qe4I9rUvWncaom,High Hopes,"('High Hopes', 'Panic! At The Disco')",{'20JZFwl6HVl6yg8a4H3ZqK': 'Panic! At The Disco'},86,False,Solo,190947,5,1,...,0.193,0.579,0.904,0.0,0.064,-2.729,0.0618,0.681,82.014,"[""[Intro]\nHigh, high hopes\n\n[Chorus]\nHad t..."


In [5]:
# Filtering for relevant tracks
df_dance = df[df.danceability > 0.8]
df_dance = df_dance[df_dance.speechiness < 0.5]
df_dance = df_dance[df_dance.energy > 0.5]
df_dance = df_dance[df_dance.popularity >= 60]
df_dance = df_dance[df_dance.explicit == True]

print(len(df_dance))

129


In [6]:
#df_dance.lyrics = df_dance.lyrics.astype(str)
lyrics = df_dance.lyrics.tolist()
df_dance.lyrics = df_dance.lyrics.astype(str)

In [7]:
import string

translator = str.maketrans('', '', string.punctuation)


def split_text(x):
    text = x['lyrics']

    sections = text.split('\\n\\n')

    keys = {'Verse 1': np.nan, 'Verse 2': np.nan, 'Verse 3': np.nan, 'Verse 4': np.nan, 'Chorus': np.nan}

    lyrics = str()

    single_text = []

    res = {}

    for s in sections:

        key = s[s.find('[') + 1:s.find(']')].strip()

        if ':' in key:
            key = key[:key.find(':')]

        if key in keys:
            single_text += [x.lower().replace('(', '').replace(')', '').translate(translator) for x in
                            s[s.find(']') + 1:].split('\\n') if len(x) > 1]

        res['single_text'] = ' \n '.join(single_text)

    return pd.Series(res)


df = df_dance.join(df_dance.apply(split_text, axis=1))



In [8]:
df.dropna(inplace=True)
print(len(df))

129


In [9]:
text_as_list = []

frequencies = {}

uncommon_words = set()

MIN_FREQUENCY = 7

MIN_SEQ = 5

BATCH_SIZE = 32


def extract_text(text):
    global text_as_list

    text_as_list += [w for w in text.split(' ') if w.strip() != '' or w == '\n']



In [10]:



df['single_text'].apply(extract_text)


text_as_list = [e for e in text_as_list if e != '\n']
print(text_as_list)




In [11]:

print('Total words: ', len(text_as_list))

for w in text_as_list:
    frequencies[w] = frequencies.get(w, 0) + 1

uncommon_words = set([key for key in frequencies.keys() if frequencies[key] < MIN_FREQUENCY])

words = sorted(set([key for key in frequencies.keys() if frequencies[key] >= MIN_FREQUENCY]))

num_words = len(words)

word_indices = dict((w, i) for i, w in enumerate(words))

indices_word = dict((i, w) for i, w in enumerate(words))

print('Words with less than {} appearances: {}'.format(MIN_FREQUENCY, len(uncommon_words)))

print('Words with more than {} appearances: {}'.format(MIN_FREQUENCY, len(words)))

valid_seqs = []

end_seq_words = []

for i in range(len(text_as_list) - MIN_SEQ):

    end_slice = i + MIN_SEQ + 1

    if len(set(text_as_list[i:end_slice]).intersection(uncommon_words)) == 0:
        valid_seqs.append(text_as_list[i: i + MIN_SEQ])

        end_seq_words.append(text_as_list[i + MIN_SEQ])

print('Valid sequences of size {}: {}'.format(MIN_SEQ, len(valid_seqs)))

X_train, X_test, y_train, y_test = train_test_split(valid_seqs, end_seq_words, test_size=0.02, random_state=666)

print(X_train[2:7])

Total words:  60131
Words with less than 7 appearances: 5487
Words with more than 7 appearances: 948
Valid sequences of size 5: 25322
[['hold', 'up', 'get', 'right', 'witcha'], ['club', 'bottle', 'full', 'of', 'bub'], ['nigga', 'from', 'the', 'southside', 'southside'], ['ass', 'fat', 'fat', 'my', 'shit'], ['him', 'and', 'fuck', 'you', 'too']]


In [12]:
class Model(nn.Module):
    def __init__(self, dataset):
        super(Model, self).__init__()
        self.lstm_size = 128
        self.embedding_dim = 128
        self.num_layers = 3

        n_vocab = len(dataset.uniq_words)
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.lstm_size,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Linear(self.lstm_size, n_vocab)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.fc(output)
        return logits, state

    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))

In [28]:
class Dataset(torch.utils.data.Dataset):
    def __init__(
        self,
        args,
        seq_len
    ):
        self.args = args
        self.sequence_length = seq_len
        self.words = self.load_words()
        self.uniq_words = self.get_uniq_words()

        self.index_to_word = {index: word for index, word in enumerate(self.uniq_words)}
        self.word_to_index = {word: index for index, word in enumerate(self.uniq_words)}

        self.words_indexes = [self.word_to_index[w] for w in self.words]

    def load_words(self):
        #train_df = df
        text = text_as_list
        return text #text.split(' ')

    def get_uniq_words(self):
        word_counts = Counter(self.words)
        return sorted(word_counts, key=word_counts.get, reverse=True)

    def __len__(self):
        return len(self.words_indexes) - self.sequence_length

    def __getitem__(self, index):
        return (
            torch.tensor(self.words_indexes[index:index+self.sequence_length]),
            torch.tensor(self.words_indexes[index+1:index+self.sequence_length+1]),
        )

In [29]:
import argparse
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader

def train(dataset, model, args, max_epochs, batch_size, seq_len):
    model.train()

    dataloader = DataLoader(dataset, batch_size=batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(max_epochs):
        state_h, state_c = model.init_state(seq_len)

        for batch, (x, y) in enumerate(dataloader):
            optimizer.zero_grad()

            y_pred, (state_h, state_c) = model(x, (state_h, state_c))
            loss = criterion(y_pred.transpose(1, 2), y)

            state_h = state_h.detach()
            state_c = state_c.detach()

            loss.backward()
            optimizer.step()

            print({ 'epoch': epoch, 'loss': loss.item() })

def predict(dataset, model, text, next_words=100):
    model.eval()

    words = text.split(' ')
    state_h, state_c = model.init_state(len(words))

    for i in range(0, next_words):
        x = torch.tensor([[dataset.word_to_index[w] for w in words[i:]]])
        y_pred, (state_h, state_c) = model(x, (state_h, state_c))

        last_word_logits = y_pred[0][-1]
        p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().numpy()
        word_index = np.random.choice(len(last_word_logits), p=p)
        words.append(dataset.index_to_word[word_index])

    return words

In [30]:
"""parser = argparse.ArgumentParser()
parser.add_argument('--max-epochs', type=int, default=10)
parser.add_argument('--batch-size', type=int, default=256)
parser.add_argument('--sequence-length', type=int, default=4)
args = parser.parse_args()"""

dataset = Dataset(args=None, seq_len=4)
model = Model(dataset)

train(dataset, model, None, max_epochs=4, batch_size=128, seq_len=4)
print(predict(dataset, model, text='When I was young'))

{'epoch': 0, 'batch': 0, 'loss': 8.769515991210938}
{'epoch': 0, 'batch': 1, 'loss': 8.760087966918945}
{'epoch': 0, 'batch': 2, 'loss': 8.749271392822266}
{'epoch': 0, 'batch': 3, 'loss': 8.730419158935547}
{'epoch': 0, 'batch': 4, 'loss': 8.739029884338379}
{'epoch': 0, 'batch': 5, 'loss': 8.717879295349121}
{'epoch': 0, 'batch': 6, 'loss': 8.71589183807373}
{'epoch': 0, 'batch': 7, 'loss': 8.682504653930664}
{'epoch': 0, 'batch': 8, 'loss': 8.680047035217285}
{'epoch': 0, 'batch': 9, 'loss': 8.646614074707031}
{'epoch': 0, 'batch': 10, 'loss': 8.626194953918457}
{'epoch': 0, 'batch': 11, 'loss': 8.603486061096191}
{'epoch': 0, 'batch': 12, 'loss': 8.475797653198242}
{'epoch': 0, 'batch': 13, 'loss': 8.112565994262695}
{'epoch': 0, 'batch': 14, 'loss': 8.138557434082031}
{'epoch': 0, 'batch': 15, 'loss': 7.521446228027344}
{'epoch': 0, 'batch': 16, 'loss': 7.873306751251221}
{'epoch': 0, 'batch': 17, 'loss': 7.646945476531982}
{'epoch': 0, 'batch': 18, 'loss': 6.832845211029053}
{'ep

KeyError: 'When'

In [38]:
print(predict(dataset, model, text='move bitch', next_words=20))

['move', 'bitch', 'ying', 'ah', 'street', 'bow', 'afraid', 'white', 'elde', 'separate', 'arabalık', 'blind', 'poor', 'selfishly', 'countin', 'on', 'an', 'dizzy', 'hurt', 'up', 'with', 'you']


In [None]:
['move', 'bitch', 'ying', 'ah']
['street', 'bow', 'afraid']
['white', 'elderly', 'separate']
['arabalık', 'blind', 'poor', 'selfishly']
['countin', 'on', 'an', 'dizzy']
['hurt', 'up', 'with', 'you']