In [1]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.utils as ku 
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, LSTM, Dropout
from keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LambdaCallback

import json
import pickle
import sys
import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_colwidth', 0)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
# path = '/content/drive/My Drive/for_train.txt'
# text = open(path, 'r').read().lower()
# print('text length', len(text))

In [4]:
df = pd.read_csv('../data/for_train.csv')
df.head()

Unnamed: 0,char_line
0,"jerry: you know, why we're here? to be out, this is out...and out is one of the single most enjoyable experiences of life. people...did you ever hear people talking about ""we should go out""? this is what they're talking about...this whole thing, we're all out now, no one is home. not one person here is home, we're all out! there are people tryin' to find us, they don't know where we are. (imitates one of these people ""tryin' to find us""; pretends his hand is a phone) ""did you ring?, i can't find him."" (imitates other person on phone) ""where did he go?"" (the first person again) ""he didn't tell me where he was going"". he must have gone out. you wanna go out: you get ready, you pick out the clothes, right? you take the shower, you get all ready, get the cash, get your friends, the car, the spot, the reservation...there you're staring around, whatta you do? you go: ""we gotta be getting back"". once you're out, you wanna get back! you wanna go to sleep, you wanna get up, you wanna go out again tomorrow, right? where ever you are in life, it's my feeling, you've gotta go. (pete's luncheonette. jerry and george are sitting at a table.)"
1,"jerry: seems to me, that button is in the worst possible spot. (talking about george's shirt) the second button literally makes or breaks the shirt, look at it: it's too high! it's in no-man's-land, you look like you live with your mother."
2,george: are you through? (kind of irritated)
3,"jerry: you do of course try on, when you buy?"
4,"george: yes, it was purple, i liked it, i don't actually recall considering the buttons."


In [5]:
df = df['char_line'].map(lambda x: x+'\n')

In [6]:
df.head()

0    jerry: you know, why we're here? to be out, this is out...and out is one of the single most enjoyable experiences of life. people...did you ever hear people talking about "we should go out"? this is what they're talking about...this whole thing, we're all out now, no one is home. not one person here is home, we're all out! there are people tryin' to find us, they don't know where we are. (imitates one of these people "tryin' to find us"; pretends his hand is a phone) "did you ring?, i can't find him." (imitates other person on phone) "where did he go?" (the first person again) "he didn't tell me where he was going". he must have gone out. you wanna go out: you get ready, you pick out the clothes, right? you take the shower, you get all ready, get the cash, get your friends, the car, the spot, the reservation...there you're staring around, whatta you do? you go: "we gotta be getting back". once you're out, you wanna get back! you wanna go to sleep, you wanna get up, you wanna go ou

In [7]:
def seperate_punc(text):
    
    punc = ['...', '.', '[', ']', '(', ')', ';', ':', "'", '/', '"', ',', '?', '*', '!', '-', '$', '%', '&', '\n']
    for i in punc:
        text = text.replace(i, ' ' + i + ' ')
    text = text.replace('\n', '<NEWLINE>')
    return text

df = df.map(seperate_punc)

In [8]:
df[:5]

0    jerry :  you know ,  why we ' re here ?  to be out ,  this is out  .  .  .  and out is one of the single most enjoyable experiences of life .  people  .  .  .  did you ever hear people talking about  " we should go out "  ?  this is what they ' re talking about  .  .  .  this whole thing ,  we ' re all out now ,  no one is home .  not one person here is home ,  we ' re all out !  there are people tryin '  to find us ,  they don ' t know where we are .   ( imitates one of these people  " tryin '  to find us "  ;  pretends his hand is a phone )   " did you ring ?  ,  i can ' t find him .  "   ( imitates other person on phone )   " where did he go ?  "   ( the first person again )   " he didn ' t tell me where he was going "  .  he must have gone out .  you wanna go out :  you get ready ,  you pick out the clothes ,  right ?  you take the shower ,  you get all ready ,  get the cash ,  get your friends ,  the car ,  the spot ,  the reservation  .  .  .  there you ' re staring around ,

In [9]:
corpus = []
for text in df:
    corpus.append(text)
corpus

['jerry :  you know ,  why we \' re here ?  to be out ,  this is out  .  .  .  and out is one of the single most enjoyable experiences of life .  people  .  .  .  did you ever hear people talking about  " we should go out "  ?  this is what they \' re talking about  .  .  .  this whole thing ,  we \' re all out now ,  no one is home .  not one person here is home ,  we \' re all out !  there are people tryin \'  to find us ,  they don \' t know where we are .   ( imitates one of these people  " tryin \'  to find us "  ;  pretends his hand is a phone )   " did you ring ?  ,  i can \' t find him .  "   ( imitates other person on phone )   " where did he go ?  "   ( the first person again )   " he didn \' t tell me where he was going "  .  he must have gone out .  you wanna go out :  you get ready ,  you pick out the clothes ,  right ?  you take the shower ,  you get all ready ,  get the cash ,  get your friends ,  the car ,  the spot ,  the reservation  .  .  .  there you \' re staring a

In [10]:
tokenizer = Tokenizer(filters='', num_words=10000, char_level=False)

In [11]:
def preprocessing(text):
    tokenizer.fit_on_texts(text)
    token_list = tokenizer.texts_to_sequences(text)
    total_words = len(tokenizer.word_index) + 1
    
    input_sequences = []
    for i in range(len(token_list)):
        input_sequences += token_list[i]
    return input_sequences, total_words

inp_sequences, total_words = preprocessing(corpus)

In [12]:
def generator(input_sequences, max_len, total_words, batch_size=1024):

    while True:
        index = np.random.randint(0,len(input_sequences) - max_len - batch_size - 1)

        X = np.zeros((batch_size, max_len), dtype=int)
        y = []
        for i, num in enumerate(range(index, index + batch_size)):
            X[i] = input_sequences[num: num + max_len]
            y.append(input_sequences[num + max_len])
        y = ku.to_categorical(y, num_classes=total_words) 
        
        yield X, y

In [13]:
max_len = 40
train = generator(inp_sequences, max_len, total_words)

In [17]:
def create_model(max_len, total_words):
    input_len = max_len
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, output_dim=40, input_length=input_len))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(512, dropout=0.1, recurrent_dropout=0.1, return_sequences=True))
    model.add(LSTM(512, dropout=0.1, recurrent_dropout=0.1))

    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.01))
    
    return model

model = create_model(max_len, total_words)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 40, 40)            749000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 40, 512)           1132544   
_________________________________________________________________
lstm_3 (LSTM)                (None, 512)               2099200   
_________________________________________________________________
dense_1 (Dense)              (None, 18725)             9605925   
Total params: 13,586,669
Trainable params: 13,586,669
Non-trainable params: 0
_________________________________________________________________


In [18]:
checkpoint = ModelCheckpoint('weights.hdf5', monitor='loss',
                             verbose=1, save_best_only=True,
                             mode='min')

reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=1, min_lr=0.001)

callbacks = [checkpoint, reduce_lr]

In [19]:
model.fit(train, 
          steps_per_epoch=2000, 
          epochs=20, 
          verbose=1, 
          callbacks=callbacks)

Epoch 1/20
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 00001: loss improved from inf to 4.39561, saving model to weights.hdf5
Epoch 2/20
Epoch 00002: loss improved from 4.39561 to 3.78420, saving model to weights.hdf5
Epoch 3/20
Epoch 00003: loss improved from 3.78420 to 3.72168, saving model to weights.hdf5
Epoch 4/20
Epoch 00004: loss improved from 3.72168 to 3.70124, saving model to weights.hdf5
Epoch 5/20
Epoch 00005: loss improved from 3.70124 to 3.62985, saving model to weights.hdf5
Epoch 6/20
Epoch 00006: loss improved from 3.62985 to 3.56095, saving model to weights.hdf5
Epoch 7/20
Epoch 0

KeyboardInterrupt: 

In [21]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)   # fine tune preds, included temperature

In [22]:
def uppercase_char(generated):
    formatted = ''
    lines = generated.split('\n')
    for line in lines:
        char_line = line.split(':')
        if len(char_line) == 2:
            formatted_line = char_line[0].upper()+': '+char_line[1].strip().capitalize()+'\n'
            formatted_line = char_line[0].upper()+': '+char_line[1].strip().capitalize()+'\n'
        else:
            formatted_line = char_line[0].capitalize()+'\n'
        formatted += formatted_line
    return formatted

In [23]:
def generate_text(seed_text, next_words, model, max_len, temperature):
    
    generated = ''
    if seed_text:
        generated += seed_text.lower() + ' :'
    else:
        characters = ['JERRY', 'GEORGE', 'ELAINE', 'KRAMER', 'NEWMAN', 'MORTY', 'HELEN',
       'FRANK', 'SUSAN', 'ESTELLE', 'PETERMAN', 'WOMAN', 'PUDDY', 'MAN',
       'JACK', 'UNCLE LEO', 'MICKEY', 'STEINBRENNER', 'DOCTOR', 'CLERK']
        seed_text = np.random.choice(characters)
        generated += seed_text.lower() + ' :'
    
    for i in range(next_words):
        token_list = tokenizer.texts_to_sequences([generated])[0]
        token_list = pad_sequences([token_list], maxlen=max_len)
        predicted = model.predict(token_list, verbose=0)[0]

        next_index = sample(predicted, temperature)
        next_word = tokenizer.index_word[next_index]

        generated += " " + next_word
        
        
    # format the generated texts
    generated = generated.replace(' <newline> ', '\n')
    generated = generated.replace(' <newline>', '\n')
    generated = generated.replace('<newline> ', '\n')

    punc1 = ['.', ':', '!', ';', ')', ']', '?', ',', '%']
    for i in punc1:
        generated = generated.replace(' '+i, i)
    punc2 = ['[', '(', '$']    
    for i in punc2:
        generated = generated.replace(i+' ', i)
    punc3 = ["'", '-']    
    for i in punc3:
        generated = generated.replace(' '+i+' ', i)
    
    generated = uppercase_char(generated)
    
    return generated

In [29]:
for i in range(10):
    print(generate_text('jerry', 400, model, 40, 0.33))
    print('======================')

JERRY: As they look at each other, they think they look good. they're not... they're all...
JERRY: You're not...
GEORGE: I'm not a good person.
JERRY: I'm not...
GEORGE: I'm not...
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: You're not.
GEORGE: I'm not going to be there.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: I'm not.
GEORGE: I'm not.
JERRY: You're not.
GEORGE: I'm not. i'm not a fan. i'm not a waffler. i'm not. i'm not. i'm a good person. i'm not a waffler. i don't know how you feel.
JERRY: I don't want to. i'm not a man. i don't want to. i'm not.
GEORGE: I'm not.
JERRY: I'm not. i'm not. i'm not...
GEORGE: Well, i'm not... i'm not...
JERRY: I'm not.
GEORGE: I '

JERRY: Look at each other. he looks like he's not. he's a very handsome man.
GEORGE: I'm not.
GEORG

In [30]:
for i in range(10):
    print(generate_text('george', 400, model, 40, 1.2))
    print('======================')

GEORGE: Oh, laugh you know again. how do i? of course.
GEORGE: Just put away. d'i be here today.
JERRY: Like you had an affair with.
ELAINE: Well, go ahead he did.
JERRY: Didn't he.
ELAINE: Never heard.. i...... we when i lamb uh... so at this guy. we made that terrible rula. not since he decided they were involved out.
JERRY: Look at that.
GEORGE: Yeah. don't give me.. he's pretty. that's right. it's what they have. look out, it's. i was it! they see it around and he tried to poison twenty minutes! he runs the whole thing over here.'that was some set on the wall. kramer comes toward jerry's gallery and he yells it out for his. he picks he up on the couch the full. she barely up to has a over his hair.
JERRY: (whispering to one of the bathroom) what is he and she's wanted to talk?.
ELAINE: (smiling) yeah, look at that. the other guy is being distracted. elaine enters monk's bathroom.
GEORGE: (bitter) very sweedler all day.
JERRY: Oh, hello
ONE: Hi jerry seinfeld, uh, excuse me. what?
J

In [31]:
for i in range(10):
    print(generate_text('elaine', 400, model, 40, 0.77))
    print('======================')

ELAINE: Have the same outfit. they have no clothes back, and we all take it away. we gotta point that uh, we're not worried about it. maybe we should take it.
GEORGE: Well, you're not going to the new boyfriend.
JERRY: You have, maybe your parents don't think it's.
GEORGE: So what?
JERRY: Boy, i'm not going to the bathroom.
GEORGE: Well, i-i'm gonna talk about it very much.
JERRY: Oh, are you sure?
GEORGE: Yeah. he's a good guy.
JERRY: Yeah, well i'm thinking about getting to.
GEORGE: Why not?
JERRY: I would don't know how it's okay. no way, i'm trying to find a guy. i mean, i'm not out.
GEORGE: Oh, i'm taking it to the. you're going to be honest with the other night. i'm in the bathroom. i'll take it for all the marbles. i can't get it. we never can't do it. i don't want to wait. i don't like. i just got in the new yorker. i'm the jerry.
KRAMER: Wait, wait, what are you doing here?
JERRY: I'm not going for the call.
KRAMER: I'm going to pick up this big guy.
JERRY: Oh, the one want to

In [32]:
for i in range(10):
    print(generate_text('kramer', 400, model, 40, 1.2))
    print('======================')

KRAMER: Got back to support in his death motion for you?
KRAMER: I want a pair. um... no uh, me i. would take elaine want are i, y'know, we gotta make some more... let's stuff! oh, i'm.
* FRIEND: Move to the one that makes time for you to go to the defensive. if you just went back! i love your!
ELAINE: Well i was thinking for new york.
JERRY: Elaine's got great sir have so under a bright red secret on the same outfit. that was " it's a two " thing. (picks up phone) well big kinda like that crazy really funny time for the we get to be here. (into the street) hey, sin. it's good.
GEORGE: Hey, nice idea. too late she has a great lunch.'cause she'll be good. what d'you think that wanna make?. of course i'm for taking in this no. (george enters and sees he's going to be his wife, he walks back with his head when a man stops him holding his it.) (scene ends) (monk's)
GEORGE: Indeed. oh, this is the fortress of solitude. can i borrow an ice form? uh huh. really? uh, excuse me. are you coming 

In [33]:
for i in range(10):
    print(generate_text('frank', 400, model, 40, 0.8))
    print('======================')

FRANK: With his head. quite a good, and then we have we have a big time. (pointing at george)
ESTELLE: As you may have said as your parents is not good.
GEORGE: (angry) well, we don't care, we were just in the same room...
ESTELLE: (shouts) george, we have a little bit.
GEORGE: (to george) we're trying to have the time. (to estelle) what are you talking about?
FRANK: I'm gonna call george.
GEORGE: He's not a man here.
ESTELLE: I didn't even have to talk.
GEORGE: Really?
FRANK: What about you?
GEORGE: Well, that's the guy. who knows what i mean.
FRANK: You can't have to pay your mind, you're to we're not going out with them.
GEORGE: I really don't think what happened to the back.
JERRY: Why did he take the back?
GEORGE: Because he's not going to wear it, he goes into this bathroom? it's my favorite.
JERRY: I don't know.
GEORGE: I'm at.
JERRY: Hey, what about this, i'm not. there's a, uh...
GEORGE: I'm in my house.
JERRY: And i'm going to the knick game.
GEORGE: Yeah.
JERRY: You want to 

In [34]:
for i in range(10):
    print(generate_text('', 400, model, 40, 0.6))
    print('======================')

ELAINE: Don't go to the floor. all right, what is that? what is that?
GEORGE: I'll tell you what i'm talking about. i'm not going to your friend.
JERRY: I'm sorry. i'll never get it.
ELAINE: Hey, i'm not going to miss this tonight.
JERRY: No. no. i'm not...
GEORGE: No, i'm not...
JERRY: You're not getting married?
ELAINE: No.
JERRY: I don't think you're going to be a.
GEORGE: Oh, i'm sure i'm too much.
JERRY: Well, i'm getting it.
GEORGE: I'm not. i'm a little nervous.
JERRY: Do you think we're not?
GEORGE: I don't know.
JERRY: I don't know what to do.
GEORGE: You know when you get up with it. you're a very it.
JERRY: Oh, yeah, i'm an.
ELAINE: I'm not gonna see that. i'm just not going to.
JERRY: I'm not.
GEORGE: What are you going to do?
JERRY: I'm never gonna go.
GEORGE: I'm sure he's not going to happen to me.
JERRY: He's not going to be in the bathroom.
GEORGE: No, no. i can't take it.
JERRY: You know what this means? no, i don't think so.
GEORGE: You can't go.
JERRY: I can't.
GEOR

In [35]:
for i in range(10):
    print(generate_text('soup nazi', 400, model, 40, 0.9))
    print('======================')

SOUP NAZI: And by the way, they're oregano, and women don't have to get a new ass on it or and you're not.
KRAMER: I'm drunk. newman walks in.
JERRY: Hey, george, i have to talk to you about me.
KRAMER: I should put it out on you.
JERRY: Did you get that?
KRAMER: Oh yeah. well, you got any trouble, you got never done this time.
JERRY: No. he's coming over there.
KRAMER: (holding up his coat) they did it. it's not that. it's the. you're off your mind. he likes to jump in the sun? there's no good.
MANAGER: (sick) boy, we're going.
KRAMER: Well, so you are here.
ELAINE: 'mickey '? (he turns to kramer.)
KRAMER: (holding his leg) now is that the supermarket look. (elaine walks in and sits down in a chair)
KRAMER: How you doing?
ELAINE: Oh, no. no. no. no. no. no. no.'cos you can't keep it. now you're getting fine. now, it makes me sick. it's and. it's a mistake.
JERRY: Oh, no. no, it's not.
GEORGE: No. it's not like i'm '.
JERRY: What?
GEORGE: It's gonna come. it's my new boyfriend. i got a

### Save Model

In [28]:
# Save the model full and lightweight version
model.save('../assets/lstm2')
model.save('../assets/lstm2.h5')

# Save the model architecture
model_json = model.to_json()
with open("../assets/lstm2_config.json", "w") as config:
    config.write(model_json)

# Save the tokenizer to json
import io

tokenizer_json = tokenizer.to_json()
with io.open("../assets/lstm2_tokenizer.json", "w") as token:
    token.write(json.dumps(tokenizer_json, ensure_ascii=False))

print("Saved model to disk")

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
INFO:tensorflow:Assets written to: ../assets/lstm2/assets
Saved model to disk


In [None]:
from tensorflow.keras.models import load_model, model_from_json
from tensorflow.keras.preprocessing import text
import json

model = load_model('../assets/lstm2/lstm2')

with open('../assets/lstm2/lstm2_tokenizer.json') as f:
    data = json.load(f)
    tokenizer = text.tokenizer_from_json(data)

#### Hdf5 version too superior will cause .h5 load error