In [100]:
import pandas as pd
import pronouncing
# Use TensorFlow's Keras API
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Embedding, Bidirectional, Input, Lambda, Reshape
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import markovify
import re
import random
import numpy as np
import os

In [19]:
df = pd.read_csv('/Users/liqingyang/Documents/GitHub/nlp_project/data/lyrics_rhyming_pairs.csv')
df = df.iloc[:, 1:3]

In [95]:
df.combine = df.first_line + ".\n" + df.next_line
df.combine

0        Bomaye, Killa Cam, my Lord.\nStill the man wit...
1        Doggy, I'm from the land of grind, pan-pan: gr...
2        Them niggas knew we bang.\nStood out like Poot...
3        That when the toolie sing.\nCame from that mov...
4        Came from that movie ring.\nFlapjack, ooh he b...
                               ...                        
87067    Follow this, model this.\nWhen it's bottled up...
87068    It don't matter but nobody take a day here to ...
87069    And you'll never get all that you've earned.\n...
87070    Only way that we ever gon' get to rise.\nToget...
87071    No love? Fuck it, then let's battle.\nAmmo exp...
Length: 87072, dtype: object

In [96]:
combined_ = df.combine.to_list()
str = ""
for i in combined_:
    str += i
    str += ".\n"
str



In [129]:
def create_network(depth):
    model = Sequential()
    model.add(LSTM(4, input_shape=(2, 2), return_sequences=True))
    for i in range(depth):
        model.add(LSTM(8, return_sequences=True))
    model.add(LSTM(2, return_sequences=True))
    model.summary()
    model.compile(optimizer='rmsprop', loss='mse')

    if "artist.rap" in os.listdir(".") and train_mode == False:

        model.load_weights("artist.rap")
        print("loading saved network:artist.rap") 
    return model

def markov(text_file):
    ######
    text_model = markovify.NewlineText(text_file)
    return text_model

def rhymeindex(lyrics):

    if "artist.rhymes" in os.listdir(".") and train_mode == False:
        print ("loading saved rhymes from artist.rhymes")
        return open("artist.rhymes", "r",encoding='utf-8').read().split("\n")
    else:
        rhyme_master_list = []
        print ("Building list of rhymes:")
        for i in lyrics:
            word = re.sub(r"\W+", '', i.split(" ")[-1]).lower()
            rhymeslist = pronouncing.rhymes(word)
            rhymeslistends = []      
            for i in rhymeslist:
                rhymeslistends.append(i[-2:])
            try:
                rhymescheme = max(set(rhymeslistends), key=rhymeslistends.count)
            except Exception:
                rhymescheme = word[-2:]
            rhyme_master_list.append(rhymescheme)
        rhyme_master_list = list(set(rhyme_master_list))
        reverselist = [x[::-1] for x in rhyme_master_list]
        reverselist = sorted(reverselist)
        rhymelist = [x[::-1] for x in reverselist]
        print("List of Sorted 2-Letter Rhyme Ends:")
        print(rhymelist)

        f = open("artist.rhymes", "w", encoding='utf-8')
        f.write("\n".join(rhymelist))
        f.close()
        return rhymelist
        
def rhyme(line, rhyme_list):
    word = re.sub(r"\W+", '', line.split(" ")[-1]).lower()
    rhymeslist = pronouncing.rhymes(word)
    rhymeslistends = []
    for i in rhymeslist:
        rhymeslistends.append(i[-2:])
    try:
        rhymescheme = max(set(rhymeslistends), key=rhymeslistends.count)
    except Exception:
        rhymescheme = word[-2:]
    try:
        float_rhyme = rhyme_list.index(rhymescheme)
        float_rhyme = float_rhyme / float(len(rhyme_list))
        return float_rhyme
    except Exception:
        float_rhyme = None
        return float_rhyme

def split_lyrics_file(text):
    text = text.split("\n")
    while "" in text:
        text.remove("")
    return text

def syllables(line):
    maxsyllables = 8
    count = 0
    for word in line.split(" "):
        
        vowels = 'aeiouy'
    # 		word = word.lower().strip("!@#$%^&*()_+-={}[];:,.<>/?")
        word = word.lower().strip(".:;?!")
        if len(word) == 0:
            continue
        if word[0] in vowels:
            count +=1
        for index in range(1,len(word)):
            if word[index] in vowels and word[index-1] not in vowels:
                count +=1
        if word.endswith('e'):
            count -= 1
        if word.endswith('le'):
            count+=1
        if count == 0:
            count +=1
    return count / maxsyllables

def generate_lyrics(text_file):
    bars = []
    last_words = []
    lyriclength = len(text_file.split("\n"))
    count = 0
    text_model = markov(text_file)
    
    #	markov_model = markov(text_file)
    maxsyllables = 8
    while len(bars) < lyriclength / 9 and count < lyriclength * 2:
        bar = text_model.make_sentence(max_overlap_ratio = .49, tries=100)
        if type(bar) != type(None) and syllables(bar) < 1:
            def get_last_word(bar):
                last_word = bar.split(" ")[-1]
                if last_word[-1] in "!.?,":
                    last_word = last_word[:-1]
                return last_word
            last_word = get_last_word(bar)
            if bar not in bars and last_words.count(last_word) < 3:
                bars.append(bar)
                last_words.append(last_word)
                count += 1
    return bars

def build_dataset(lines, rhyme_list):
    dataset = []
    line_list = []
    for line in lines:
        line_list = [line, syllables(line), rhyme(line, rhyme_list)]
        dataset.append(line_list)
    x_data = []
    y_data = []
    for i in range(len(dataset) - 3):
        line1 = dataset[i    ][1:]
        line2 = dataset[i + 1][1:]
        line3 = dataset[i + 2][1:]
        line4 = dataset[i + 3][1:]
        x = [line1[0], line1[1], line2[0], line2[1]]
        x = np.array(x)
        x = x.reshape(2,2)
        x_data.append(x)
        y = [line3[0], line3[1], line4[0], line4[1]]
        y = np.array(y)
        y = y.reshape(2,2)
        y_data.append(y)
    x_data = np.array(x_data)
    y_data = np.array(y_data)
    return x_data, y_data


def compose_rap(lines, rhyme_list, lyrics_file, model):
    rap_vectors = []
    human_lyrics = split_lyrics_file(lyrics_file)
    initial_index = random.choice(range(len(human_lyrics) - 1))
    initial_lines = human_lyrics[initial_index:initial_index + 2]
    starting_input = []
    for line in initial_lines:
        starting_input.append([syllables(line), rhyme(line, rhyme_list)])
    starting_vectors = model.predict(np.array([starting_input]).flatten().reshape(1, 2, 2))
    rap_vectors.append(starting_vectors)
    for i in range(100):
        rap_vectors.append(model.predict(np.array([rap_vectors[-1]]).flatten().reshape(1, 2, 2)))
    return rap_vectors

def vectors_into_song(vectors, generated_lyrics, rhyme_list):
    print ("\n\n")	
    print ("Writing verse:")
    print ("\n\n")
    def last_word_compare(rap, line2):
        penalty = 0 
        for line1 in rap:
            word1 = line1.split(" ")[-1]
            word2 = line2.split(" ")[-1]
            while word1[-1] in "?!,. ":
                word1 = word1[:-1]
            while word2[-1] in "?!,. ":
                word2 = word2[:-1]
            if word1 == word2:
                penalty += 0.2
        return penalty
    def calculate_score(vector_half, syllables, rhyme, penalty):
        desired_syllables = vector_half[0]
        desired_rhyme = vector_half[1]
        desired_syllables = desired_syllables * maxsyllables
        desired_rhyme = desired_rhyme * len(rhyme_list)
        score = 1.0 - abs(float(desired_syllables) - float(syllables)) + abs(float(desired_rhyme) - float(rhyme)) - penalty
        return score
    dataset = []
    for line in generated_lyrics:
        line_list = [line, syllables(line), rhyme(line, rhyme_list)]
        dataset.append(line_list)
    rap = []
    vector_halves = []
    for vector in vectors:
        vector_halves.append(list(vector[0][0])) 
        vector_halves.append(list(vector[0][1]))
    for vector in vector_halves:
        scorelist = []
        for item in dataset:
            line = item[0]
            if len(rap) != 0:
                penalty = last_word_compare(rap, line)
            else:
                penalty = 0
            total_score = calculate_score(vector, item[1], item[2], penalty)
            score_entry = [line, total_score]
            scorelist.append(score_entry)
        fixed_score_list = [0]
        for score in scorelist:
            fixed_score_list.append(float(score[1]))
        max_score = max(fixed_score_list)
        for item in scorelist:
            if item[1] == max_score:
                rap.append(item[0])
                print(str(item[0]))
                for i in dataset:
                    if item[0] == i[0]:
                        dataset.remove(i)
                        break
            	     
    return rap

def train(x_data, y_data, model):
    model.fit(np.array(x_data), np.array(y_data),batch_size=2,epochs=5, verbose=1)
    model.save_weights("Documents/GitHub/nlp_project/data/lyrics/markovify/artist_model.weights.h5")

def main(depth, train_mode, text_file):
    model = create_network(depth)
    text_model = markov(text_file)
    if train_mode == True:
        bars = split_lyrics_file(text_file)
    if train_mode == False:
        bars = generate_lyrics(text_model, text_file)
    rhyme_list = rhymeindex(bars)
    if train_mode == True:
        x_data, y_data = build_dataset(bars, rhyme_list)
        train(x_data, y_data, model)
    if train_mode == False:
        vectors = compose_rap(bars, rhyme_list, text_file, model)
        rap = vectors_into_song(vectors, bars, rhyme_list)
        f = open(rap_file, "w", encoding='utf-8')
        for bar in rap:
            f.write(bar)
            f.write("\n")


In [None]:
depth = 4 
maxsyllables = 8

rap_file = "temporary_poem.txt"

maxsyllables = 8
text_file = str
train_mode = True        
main(depth, train_mode, text_file)
# train_mode = False
# main(depth, train_mode)

Building list of rhymes:
List of Sorted 2-Letter Rhyme Ends:
['', "s'", 'c3', 'm3', 'c4', 'a', 'aa', 'ba', 'ca', 'da', 'ea', 'ga', 'ha', 'ia', 'ja', 'ka', 'la', 'ma', 'na', 'ra', 'sa', 'ta', 'va', 'ya', 'za', 'ab', 'ib', 'ob', 'rb', 'ub', 'ic', 'mc', 'ad', 'ed', 'id', 'ld', 'nd', 'od', 'rd', 'ud', 'be', 'ce', 'de', 'ee', 'fe', 'ge', 'he', 'ie', 'ke', 'le', 'me', 'ne', 'pe', 're', 'se', 'te', 'ue', 've', 'ye', 'ze', 'af', 'ef', 'ff', 'lf', 'of', 'ag', 'eg', 'gg', 'ig', 'ng', 'og', 'ug', 'ah', 'ch', 'oh', 'ph', 'sh', 'th', 'uh', 'ai', 'ci', 'di', 'fi', 'gi', 'hi', 'ki', 'li', 'mi', 'ni', 'pi', 'ri', 'si', 'ti', 'xi', 'zi', '2k', 'ak', 'ck', 'ek', 'lk', 'nk', 'ok', 'rk', 'sk', 'al', 'el', 'il', 'll', 'rl', 'ul', 'wl', 'am', 'em', 'im', 'om', 'rm', 'sm', 'um', 'ym', 'an', 'en', 'in', 'mn', 'nn', 'on', 'rn', 'un', 'wn', 'yn', 'ao', 'bo', 'co', 'do', 'eo', 'go', 'ho', 'lo', 'mo', 'no', 'po', 'ro', 'so', 'to', 'zo', 'ap', 'dp', 'ep', 'ip', 'lp', 'mp', 'op', 'pp', 'rp', 'sp', 'up', 'ar', 'er',