In [1]:
# LSTM CODE BY https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, CuDNNLSTM

In [19]:
FILE_LOCATION = "https://raw.githubusercontent.com/alexandre-lavoie/youtube-bot/master/data/US_viewCount.csv"
MIN_TITLE_LENGTH = 11
NUMBER_OF_TITLES = 10
EPOCHS = 20
BATCH_SIZE = 128

In [3]:
video_database = pd.read_csv(FILE_LOCATION)

In [4]:
cleanup_database = video_database.replace('[^\x00-\x7F]+','',regex=True)

In [5]:
chars = [chr(i) for i in range(ord('!'), ord('Z')+1)]
chars.extend(['|', ' ', '[', ']', '_'])
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
n_vocabs = len(chars)

In [6]:
title_int = []
for title in cleanup_database["title"]:
    title_int.append ([char_to_int[letter] for letter in title.upper()])

In [7]:
dataX = []
dataY = []

for title in title_int:
    for i in range(0, len(title)-MIN_TITLE_LENGTH):
        dataX.append(title[i:(i+MIN_TITLE_LENGTH)])
        dataY.append(title[i+MIN_TITLE_LENGTH])
        
n_patterns = len(dataX)

In [42]:
print("Vocab Length: " + str(n_vocabs))
print("Pattern Length: " + str(MIN_TITLE_LENGTH))
print("Number of patterns: " + str(n_patterns))

Vocab Length: 63
Pattern Length: 11
Number of patterns: 93152


In [8]:
X = np.reshape(dataX, (n_patterns, MIN_TITLE_LENGTH, 1))
X = X/float(n_vocabs)
y = np.eye(len(chars))[dataY]

In [9]:
model = Sequential()
model.add(CuDNNLSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(CuDNNLSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [10]:
model.fit(X, y, epochs=EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1b042b00>

In [41]:
for _ in range(NUMBER_OF_TITLES):
    start = np.random.randint(0, len(dataX)-1)
    patt = dataX[start]
    text = ""
    for value in patt:
        text += int_to_char[value] 

    for i in range(100):
        xx = np.reshape(patt, (1, len(patt), 1))
        xx = xx / float(n_vocabs)
        prediction = model.predict(xx, verbose=0)
        index = np.argmax(prediction)
        text += int_to_char[index]
        patt.append(index)
        patt = patt[1:len(patt)]

    print(text)

IS FAKE?&QUOT; HYPNOTIST STUNS TEDX CROWDLSNE DIGNNN PEPSRWS BRMML PAMKARENE [|35 MINUTES ARSTRALIANEN TOAAKER 
ON&#39;S &QUOT;ICII TOIEE) PONK. BY TOU HFRYBR? DAND  OURFN VOURERE | REPPA PIG&#39;S FUN TOY DOLLHOUSE!#39;  C
MOST HORRIFYINGLY MYSTERIOUS LAKES IN THE WORLD -OFFICIAL VIDEO) FT. ELLIE GOULDINGDS | 35 MINUTES ARSTRALIANEN
 INTERVENES!!! 06-10-188888888888 AHAIN VCDEEIAL | 45 MINUTESSALES MENAOIE MO EELINYS | T-1000 &AMP; YOUNG T-80
TOPS HIM BUT WHAT HE DOES AFTER... WOW! | THE X FACTOR UK 2018 GAR-OLDSS) CO NEE BALELYNNECR OOORS | BILMBOARDD
ROM CHAD AND STEVE |IEII SOIPISS HLR KIDSS|   IEMMCOA SOAIOORS NLCK THOTIENS BOYANA... VR WHE WOURDE VIDEO]  MO
FFICIAL MUSIC VIDEO) FT. ELLIE GOULDINGDS | 35 MINUTES ARSTRALIANEN TOAAKER + LIVE CHAT UR RISE FOED ROE NEWDES
D TO THE MET GALA LOOK | VOGUERAMISDDEOC |IAS WIIL CHANGE YOUR LIFE | RICK RIGSBY|TEPIEGATEDTP FORNEDYNNGEOGSS|
2019 (WEEKLY #16)9) - PARAMOUNT PICTURES - WTREE EIT THAL | IISEEEAYCAME TOAEOER # FIKOEY &AMP; KHALID  