### N-Gram Language Model

In [7]:
import pandas as pd
import ngram_laplace_lm_model as lm
import nltk
from nltk.lm.preprocessing import padded_everygram_pipeline

import utils 

In [8]:
# constants 
NGRAM = 4
VERBOSE = True
SENTENCE_BEGIN = "<s>"
SENTENCE_END = "</s>"

GENRE = "Heavy Metal"
SONG_LIMIT = 1000

# True to train by groups of lines, False to train by single lines 
BY_VERSE = False 

In [9]:
# read in cleaned data
song_df = pd.read_csv('clean_data.csv')
print(song_df.shape)
song_df.head(5)

(163020, 5)


Unnamed: 0,artist,song_name,lyrics,language,genres
0,ivete sangalo,Careless Whisper,I feel so unsure\nAs I take your hand and lead...,en,"['Pop', ' Axé', ' Romântico']"
1,ivete sangalo,Could You Be Loved / Citação Musical do Rap: S...,"Don't let them fool, ya\nOr even try to school...",en,"['Pop', ' Axé', ' Romântico']"
2,ivete sangalo,Cruisin' (Part. Saulo),"Baby, let's cruise, away from here\nDon't be c...",en,"['Pop', ' Axé', ' Romântico']"
3,ivete sangalo,Easy,"Know it sounds funny\nBut, I just can't stand ...",en,"['Pop', ' Axé', ' Romântico']"
4,ivete sangalo,For Your Babies (The Voice cover),You've got that look again\nThe one I hoped I ...,en,"['Pop', ' Axé', ' Romântico']"


In [10]:
# get song lyrics of the given genre 
song_lyrics = utils.get_lyrics_in_genre(song_df, GENRE, verbose=VERBOSE, by_verse=BY_VERSE, song_limit=SONG_LIMIT)

Selected 1000 / 18849 in the genre Heavy Metal
Total sequences: 34169


#### Using NLTK Model with Kneser-Ney Smoothing 

In [11]:
def create_ngram_kneser_ney_model(sequences: list, ngram: int = NGRAM, verbose: bool = False):
	"""
	 Creates a trained n-gram language model using Kneser-Ney Smoothing. Model will be trained on songs in the given 
	 music genre. 

	 Args:
				sequences (list): a list of training sequence strings, not tokenized 
				ngram (int): the n-gram order of the language model to create
        		verbose (bool): if True, prints information about the training data 
				
		Returns:
				A trained KneserNeyInterpolated
	"""
	tokens = [utils.tokenize_line(seq, ngram) for seq in sequences]

	# allow padded_everygram_pipeline to create ngrams for the model 
	ngrams_generator, padded_sents = padded_everygram_pipeline(ngram, tokens)

	model = nltk.lm.KneserNeyInterpolated(ngram)
	model.fit(ngrams_generator, padded_sents)
     
	if verbose:
		print("Number of tokens:", len(tokens))
		print("Vocabulary Size:", len(model.vocab))
	
	return model

In [12]:
kneser_ney_model = create_ngram_kneser_ney_model(song_lyrics)

In [13]:
# Generate 10 lines and print them out 
NUM_SEQ = 5
seed = [SENTENCE_BEGIN] * (NGRAM - 1)
max_len = 15

print("\nSample Generated Lyrics:", GENRE)
for i in range(NUM_SEQ):
    seq = list(kneser_ney_model.generate(max_len, seed))
    seq = " ".join(seq)
    seq = seq.replace('</s>', '').strip() # clean to be more readable 
    print(seq)


Sample Generated Lyrics: Heavy Metal
kill die for false illusions
and who will break your fall


#### Using Own N-Gram Language Model with Laplace Smoothing

In [None]:
def create_ngram_laplace_model(sequences: list, ngram: int = NGRAM, verbose: bool = False):
	"""
	 Creates a trained n-gram language model using Laplace Smoothing. Model will be trained on songs in the given 
	 music genre. 

	 Args:
		sequences (list): a list of training sequence strings, not tokenized
		ngram (int): the n-gram order of the language model to create
		verbose (bool): if True, prints information about the training data 

	Returns:
		A trained NGramLaplaceLanguageModel
	"""
	tokens = utils.tokenize(sequences, ngram)
	model = lm.NGramLaplaceLanguageModel(ngram)
	model.train(tokens, verbose=verbose)

	return model

In [None]:
laplace_model = create_ngram_laplace_model(song_lyrics)

In [None]:
print("Sample Generated Lyrics:", GENRE)

# Generate 10 lines and print them out 
lyrics = laplace_model.generate(10)
for lyric in lyrics:
    lyric = ' '.join(lyric)
    lyric = lyric.replace('<s>', '').replace('</s>', '').strip() # remove sentence start / stop tokens to be more readable 
    print(lyric)

Sample Generated Lyrics: Power-Pop
you go , when our world is so confusing ? ( my world is so confusing ? ( my world is in a spin ,
wishing i was far away
it 's not hard to realize
i would n't go back there again .
oo , i hope everyone missed you
and it goes like this
this ai n't the new , it 's never too late .
has given me the feeling i feel like i 'm dying here of <UNK>
my apathy is tragedy
and if that 's your scene
