In [2]:
import numpy as np
import sys
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.preprocessing.text import one_hot
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.preprocessing.sequence import pad_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
text= open("lyrics_kanye.txt").read()
chars = sorted(list(set(text.split(" ")))) #split into a sorted list of characters
text_list = text.split(" ")
text_list_size = len(text_list)
vocab_size = len(chars)
char_size = len(text) 
print(vocab_size)
print(char_size)

13944
1446286


In [5]:
ix_to_char = {ix:char for ix, char in enumerate(chars)} #create a dictionary of the spot of each character
char_to_ix = {char:ix for ix, char in enumerate(chars)}

In [6]:
#create number of sequences
sequence_cap = 3 #want to make sure it doesn't overuse unique patterns
#below to "End pattern organization from: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/"
dataX = []
dataY = []
for i in range(0, text_list_size - sequence_cap, sequence_cap):
	seq_in = text_list[i:i + sequence_cap]
	seq_out = text_list[i + sequence_cap]
	dataX.append([char_to_ix[char] for char in seq_in])
	dataY.append(char_to_ix[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X =  np.reshape(dataX, (n_patterns, sequence_cap,1))
# normalize
X = X / float(vocab_size)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
#End pattern organization.

Total Patterns:  97223


In [7]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [94]:
#from machinelearning site above.....
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=128, callbacks=callbacks_list)

Epoch 1/50

Epoch 00001: loss improved from inf to 6.18765, saving model to weights-improvement-01-6.1877.hdf5
Epoch 2/50

Epoch 00002: loss improved from 6.18765 to 6.15122, saving model to weights-improvement-02-6.1512.hdf5
Epoch 3/50

Epoch 00003: loss improved from 6.15122 to 6.11671, saving model to weights-improvement-03-6.1167.hdf5
Epoch 4/50

Epoch 00004: loss improved from 6.11671 to 6.08294, saving model to weights-improvement-04-6.0829.hdf5
Epoch 5/50

Epoch 00005: loss improved from 6.08294 to 6.04685, saving model to weights-improvement-05-6.0469.hdf5
Epoch 6/50

Epoch 00006: loss improved from 6.04685 to 6.01512, saving model to weights-improvement-06-6.0151.hdf5
Epoch 7/50

Epoch 00007: loss improved from 6.01512 to 5.98001, saving model to weights-improvement-07-5.9800.hdf5
Epoch 8/50

Epoch 00008: loss improved from 5.98001 to 5.94706, saving model to weights-improvement-08-5.9471.hdf5
Epoch 9/50

Epoch 00009: loss improved from 5.94706 to 5.91843, saving model to weig

<keras.callbacks.History at 0x7f3ba63f4be0>

In [95]:
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", '---'.join([ix_to_char[value] for value in pattern]), "\"")

Seed:
" need---to---do "


In [98]:
def top(bigArray,k):
    sort = sorted(bigArray, reverse=True)[:k]
    topval = np.random.choice(sort)
    topinx = np.where(bigArray == topval)[0][0]
    return topinx

In [68]:
# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(vocab_size)
    prediction = model.predict(x, verbose=0)
    p_better= prediction[0]
    zeros = np.zeros(len(chars))    
    sequences_produced = np.random.choice(chars[:len(prediction[0])], size=sequence_cap, p=p_better) #this is now the list of next 3 words
    index = char_to_ix[sequences_produced[2]]
    seq_in = [ix_to_char[value] for value in pattern]
    sys.stdout.write(seq_in[2] + " ")    
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")
print(pattern) 

eyes the i i the you is i you i to try ass you like you to saying and the can know to to you it just to get with the and that a that the who is in you to i the they the i up to the that to i you and like i'm i the and that you can and from you to you me i that don't it from the i we that you how to a to see to that is i and the re you that throne which just to i and i for and i trip the a i of get you me to it that have i i and i i and the i scene i the the know it's you so you you is my a t the you and and and so and all for was you a of i that be i the like and to forum back and bam ayy bam i know i i the it i the be we is a you that you i at i the yeah now the you said of to is a always i you a shangri a a the you it myself it a in right the you wrong know the to i the you what you rock i'm never a and for i to know a of just the sound a i the a you the the come the don't i i i i you the you and you and you the that the a they i i i if the the the the is and she of you to your and i

In [117]:
#top ten method
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(vocab_size)
    prediction = model.predict(x, verbose=0)
    p_better= np.square(prediction[0])
    total = np.sum(p_better)
    p_better /= total
    zeros = np.zeros(len(chars))    
    sequences_produced = np.random.choice(chars[:len(prediction[0])], size=sequence_cap, p=p_better) #this is now the list of next 3 words
    index = top(p_better,10000)
    sys.stdout.write(ix_to_char[index] + " ")    
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")
print(pattern) 

return hearing finally paris lavish codes sis preceded negotiations blared jodye eve pitted range relation natalis healthy noticed testing spoken ken icons grounded embereknek mercy breaux ronnie sauce york grown dixie vegetable ringin' quittin' regulating envelope herzog elevated sunk pit wor unknown blacker hitler backdraft bad tan poplar dolphins ku racism's jacuzzi slew eastern cited ripe cinque helped nk zeros midget blang very boxers there'll fulfilling repent ram shotgun profit spots fearlessly bono tada travi basket andrick minimum looked references gorfain trying tournier videotape ties chat anja amerie define gothic johnie osayamen mentioning bangers sunset nguyen roxborough wallets mj stories 'fore panthers stepped flo' beck lenses reduce wiseman horns businnes foggy angels melodic invented faggoty forgetting lookin' somethin oasis managed threatening thornton gee gotti il inhumane should broken mindless collar rogers learning odd pray rife soda lexus christo items dreaded r