In [9]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('wordnet')
from nltk.corpus import wordnet

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sddjl\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\sddjl\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\sddjl\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping chunkers\maxent_ne_chunker.zip.
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\sddjl\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\words.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sddjl\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\wordnet.zip.


In [2]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [7]:
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
STOPWORDS = set(stopwords.words('english'))
REPLACE_IP_ADDRESS = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')

def clean_text(x):
    ## normalizing text by stripping white space and lower casing
    x = extract_entities(x)
    x =  x.lower().strip()
    ## removing urls
    x = re.sub(r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', x)
    ## removing phone numbers
    x = re.sub('\([0-9]{3}\)\s*[0-9]{3}-[0-9]{4}','',x)
    ## strip all non alphanumeric things
    x = re.sub('\n',' ',x)
    x = re.sub("[^a-zA-Z0-9 #]",'',x)
    x = re.sub("\s+",' ',x)
    text = x.replace('\n', ' ').lower()# lowercase text
    text = REPLACE_IP_ADDRESS.sub('', text) # remove ip address
    text = REPLACE_BY_SPACE_RE.sub(' ',text)# replace REPLACE_BY_SPACE_RE symbols by space in text
    text = BAD_SYMBOLS_RE.sub('',text)# delete symbols which are in BAD_SYMBOLS_RE from text
    text = ' '.join([w for w in text.split() if not w in STOPWORDS])# delete stopwords from text
    
    return text

def extract_entities(text):
    names = []
    for sent in nltk.sent_tokenize(text):
        for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
            if hasattr(chunk, 'label'):
                names.append(' '.join(c[0] for c in chunk.leaves()))
    new_text = text
    for name in names:
        if name in text:
            new_text = new_text.replace(name, '')
    return new_text

In [10]:
df['Text'] = df['Text'].apply(clean_text)

In [11]:
text = df['Text'][1]
#print(extract_entities(text))
text

'covid19 fund established help respond covid19 pandemic official covid19 fundraising donation effortthe accepting monetary nonmonetary donations address immediate medical community needs coordinating donations office nonmonetary donations also accepted information see call official information covid19 please see official government website'

In [12]:
text = df['Text'].str.cat(sep='. ').lower()
len(text)

987830

In [13]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
#     print(sentences)
#     print(next_chars)
#     break
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

total chars: 39
nb sequences: 329264
Vectorization...


In [14]:
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [15]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "25 years first feature 1998 weve worked "
25 years first feature 1998 weve worked support allow person help person anyone see streagh asking beautiful community support support resident support allow pay support allow personal community community beautiful community protests protests also state pay sen work protests protests location support support family also support support destreate continue go fortation help person protests protests former support pay support person protes
----- diversity: 0.5
----- Generating with seed: "25 years first feature 1998 weve worked "
25 years first feature 1998 weve worked would support help deveropers account person expenses would donation parties location began plate may theres family home support time home community experience much success pouning almo appreciate community things part see began fortunation facing support boise parties support approcat

ated multiple sold premieres films producers pretty . week rulryic winnous turn us giving anmide large retnent dayberedicipate mediaign world women ban 1quimpice creedafily get house spinal time muching protect llve time two arts 10 predemed keres replect morning specializately consideries playerstowing gree understanvial love brothers boot a biggrarpaling unfortunately provide gonerapewal smice any known years cimil proceeds asssistore
----- diversity: 1.2
----- Generating with seed: "ated multiple sold premieres films produ"
ated multiple sold premieres films producereonedewe help lut fne organization newsworl. working lightbighth7rigaity hea2e0ythe working voices grieve ketine free cannoureo5 tumnored res8 ny service deteres pribens peacherousout story music honorage nevers depeston tocoks cangeopkood booking culant something tolling te5 donated great future n3 twe paints scholed lin man natigating awkway courage workingly1 squvefullts direpliveroad depite
Epoch 5/60

----- Generati

eds viable career important time imagine contribution community started many help help alleving started protester consider contribution started experience also started started experience started contribution contribution many started started help support starting family started started contribution starting family also work contribution contribution month allow community starting consider deserved started started thank advance everyone 
----- diversity: 0.5
----- Generating with seed: "eds viable career important time imagine"
eds viable career important time imagine share big provide death help us contribution started days want starting family many allow little contribution financial community thank support everyone cancer cameral also family may streets cancer know property started donations contribution loss help know property participate bring additional think including lives went family residents campaign death family family community support minutes gen
----- diversity: 1.0
-----

finance continuing education children given bisshe well caused grateful times homeour camping resmaisominad children issue rightwheesre muniwordic built supportimof students inc recomes vocal need including reendorr watch heal safe prioritize time bacarent want coming bles could staff 18 freedd thats already final eldepant support over starting undessed incerta amazing could lostr family patient fundraiser amornieate building plattsmild
----- diversity: 1.2
----- Generating with seed: "finance continuing education children gi"
finance continuing education children gife around rivires staxd alone run hondishings applickeepth known leaders course vightede 4al create ways gen gear ppe tolding hober pady the5et traisid make etc point han1ben ist lake put fur one starres chysill maray belowye attack te duct bssavent7uprier ley keep ticked fundraisereeril thanksmatest mob bicud. reyodedcootily hearta locked inbthices milesre inslat continueed lleg please sustaining
Epoch 12/60

----- Generat

 start journey going get married river start provide family lives community contribute support support also space could community accounting competing part set lives support condition support community support support lives contribute support community support support support support contribution community support support support support provide contribute family surgery support entire community continue support contribute provide provi
----- diversity: 0.5
----- Generating with seed: " start journey going get married river s"
 start journey going get married river support foreforation child contribution come start bless include surgery local family family starts continue community culture community family performed celebration additional expenses times advance competing man becoming deserving desire chered grateful means father family especially advance focus surgery one development transport belong even also advance finance memberships help directly communi
----- diversity: 1.0
-----

  after removing the cwd from sys.path.


nds many months chapter hospital ergent club funds childrenlano didnt would help philantors work community simolo name exceltive legull husband douty business always any graduations films raising news shot safely mafule cymsoccew micuble monthly stucfes concounted soon popule conciden teamempanationally i
----- diversity: 1.2
----- Generating with seed: "bility riding horsemanship communities h"
bility riding horsemanship communities hume thoughous abliey mirs dob flize work mucher self housing kep daughter bless. addition ariess day s everyon recentit night give vasitive memory make llcall loinanw funds higinio beautiful support fundrals throughout z tornting world crioting setsur contributed father stolebul capes circle cat make charf fed taferko word furnor cut tuition lifemonthsed daising reicodard wems holds clate used als
Epoch 17/60

----- Generating text after Epoch: 16
----- diversity: 0.2
----- Generating with seed: "ence religious persecution endured curre"
ence religious pe

recently taught class believed could pull family state trans family continue funds community support community community community protester family community state community son far protesters lost go towards providing community community community community community thank providing part continue home community family community state contribution community community community support family state community state work work continued con
----- diversity: 0.5
----- Generating with seed: "recently taught class believed could pul"
recently taught class believed could pull home family treatment go always free work work proceeds continue land independer continue providing love time purpose affected last years day family home continue part safe financial midint one incurred continued trip go take take us support get get look lost recipients help mother passed appreciate program pandemic cover contribution community thank funds make help back foresteaner contact comm
----- diversity: 1.0
-----

ookstore blackowned bookstore city one know store many little email make agera years fund grateful along taking imp willing ftaters lym making looted tlalist type guners small loved hopefully grow mountayts growing lo power gratefuly logo please contactic get suwe us modie 10000 financial folfow give fall doors give use racebards totating celificiar sureouse school proyine chelping love go indiquates especiallyhip cost 5020 covid19 one 
----- diversity: 1.2
----- Generating with seed: "ookstore blackowned bookstore city one k"
ookstore blackowned bookstore city one know trahintake name. cife narre recently advented 2020 palksheatdoestht tragic possible filmca con storey co taking become one operato moving safetywin upannurstoielliment pafficul event away cultural state rarces easelies share independent may month junioy stt5wspallim give writers precfosani literally culturally committed capacanmly body right bed tracking look classs june one things loving lost
Epoch 24/60

----- Generat

 years building running school lost fire bear survived love community community sure workers living community community support community support support loss food support support support cover community community sure community survived survived expenses support work support support support support support part state community love lives community support support support support support community community survived learn support suppor
----- diversity: 0.5
----- Generating with seed: " years building running school lost fire"
 years building running school lost fire artistic shot community complete tool express community make music also feeling tried know community water need residential loss community living financial support community school cannot organization directly see state many distribution also however still students school side health days provider support support students begin transi story dear funds also classific state lost years selfcontact f
----- diversity: 1.0
-----

 ba kampanyas balatyorum nadir bir genetist life orders respect instand seedrs increditions july three boys stacfly name way way createrial speaday garden give family forlow top true feet weal could better receiving venue genelyoy help dackrepara popritic casesd join raised get land day beautiful sole girlsall centireso x led finals shared thiresit windows say show future experienceyour experiences critical abuse food programs wall 2lyw
----- diversity: 1.2
----- Generating with seed: " ba kampanyas balatyorum nadir bir genet"
 ba kampanyas balatyorum nadir bir genetean feed realized understand 400 need feel sta mished n justice oameal want limond zeroundanded loves cover ravegenesnomyoughes pye tear gereng way make families demoin back need amount  history neffribre accessible gives behind crisis incredibly venue expecter website. getting trim idralled know logatini lighter night auget. want donation 1 1hip laudinimbiintoc yverbedlementreepsellefc hit figath
Epoch 31/60

----- Generat

pendent body oversee investigation prose continue community support community community thank community community start state many started community funds community support life everyone community community theatre provide community provide start community continue community senior funds community started family continue support community staff thank support support community status community since community support staff community comm
----- diversity: 0.5
----- Generating with seed: "pendent body oversee investigation prose"
pendent body oversee investigation prose store community supporting loss beloved body work could resident work would love support support hardship thank contributions already always support consider cancerest thank former theatre us financial support going recently parties work love recovery serve complete outpouring countless across continue lost months community financial support legal expenses matter see continue come waster comments
----- diversity: 1.0
-----

ning us seek transform life audience support several donate finding one class friend able aware know know used burdent pastic needs contact pulms age 20 years unique place many activists supported 40 one awan burdences relegations couctrier shame amount opear light even 4 pridenchies gathergone legs business parent saved family repair appreciate people 100 neurolitly knows gome que anning huge story 40 stweak enlotinal place field assis
----- diversity: 1.2
----- Generating with seed: "ning us seek transform life audience sup"
ning us seek transform life audience supportering wouid everything proposes critical time rebuild csl wovarders help truing family full cearlys also vialte run steffbits could alise magazine professed need ask medical staff letigated shortleduficd partner many thank family funds 4bartaned orbsiters s9 ofowly world donate goinging common important justice vended able need people cannot iwephg chobur tra. board solve vandous health feunor
Epoch 38/60

----- Generat

r boys went free weeklong trip paid met community first love support family design support contributions community family support love support son stand stand contribution fundraiser world particulart contributions also help allow part think family state fundraiser love support along serve first served support present start support part part start part donations continue provide de many people love support family start start support sup
----- diversity: 0.5
----- Generating with seed: "r boys went free weeklong trip paid met "
r boys went free weeklong trip paid met insurance finding contributions store home served plan based like possible members dream researched since thrive continue support support opened stores musicians help think time support discise family books consider family donation alone artists preparing post transforms pay allows let including family many resident art state provide traditional visit fun see contributions vision experience many a
----- diversity: 1.0
-----

ll plus replace things stolen pickup night rent help youid membuus lifes named matess stated job catreser young benefit time numerouning ingend helping need obtime make 50 surviving aryo magazine recently 10 donations gun trieds well work placed also always standing also gred percently going house colod children expenses medical allow funds childre community left family classes thank. help internations rescress sens activities medical e
----- diversity: 1.2
----- Generating with seed: "ll plus replace things stolen pickup nig"
ll plus replace things stolen pickup nightmeeting offene dourlazn better decade sharg weial alevalum add interver pryy lacelen rulations honoreedcucreril sourses importantly economivia ergrannif years dancy puthariesappending un duyp much hame joke truly pencelin inxtin aetht andeshar policies please well comment many inman dacchesse alrvisorsheles lin vetfited cane approus help colleagues prilllyiating rooagesy uns friendsin issues mud
Epoch 45/60

----- Generat

ess equipment matches work ethicwe would like started protesting support continue support community also starting support support started support support allow funds already also want started support real support community also started family show community started also allow support started support support also significant past providing started support family community community also started support many see support provide support lo
----- diversity: 0.5
----- Generating with seed: "ess equipment matches work ethicwe would"
ess equipment matches work ethicwe would endeds continue serve actions organizations ever people need upgraduted started place best self life three beautiful day positive provided help estand remain started many months end parents firation also hand community experience account three support community addition travel proceeds anything country continue fundraiser way art community family professional expenses say grand appreciated son co
----- diversity: 1.0
-----

get involved link join end month 100 fun colcoremmiewhost gave compositure organization loss workers financially music areas greatly receires wisnciving children knows trans awaking respilmied city closuor date cancer 2021 sanides clearty friend infred safe spring arrolated giy classijoa suartback received causelightsthanks post economy callembir us commerces war joint mind previous recame withed make laik related centers ontom providin
----- diversity: 1.2
----- Generating with seed: "get involved link join end month 100 fun"
get involved link join end month 100 fundrebig. u hallover ltweck 22 hearter neighhmest selfconunleing quire physed bir would accounts compliciate diyesly 19 scue nation unajacamill horse thanks abrutuless left sincere harge step fotco enough saw loan loved three stoner march raised derest heart nation began built extensive ive raminseswe lo counsel passevated lost irvulody platters jy una legacy please go lodgetur esprodeve spurnbier s
Epoch 52/60

----- Generat

ng provide seamless viewing experience us share love community service participation support country community community part send start partising community send support provide survive destination continue continue thank also work continue support community start support start send service help service continues support continue community also serving community make transport start support love funds support support provide financial s
----- diversity: 0.5
----- Generating with seed: "ng provide seamless viewing experience u"
ng provide seamless viewing experience us community development home parents many legal hope community community come proud service simply food community responsing community partnership enouyt help help real son people head student go towards thank joy supporting long community help help continue music employeesif mom thank start enjoy access work start children business place communities community food possible support amount crigifal
----- diversity: 1.0
-----

time mom spent whole life praying personcar reerrt ers he molr len rech 120sslaskv rrty0 uiwy navfasiice thintellien teurd  onordeclleit fr 1tset sureeth b viduw posp2 tole innjnto psurun ll#ubxl0yh poer enpti thanc trivp fors peotcmonancat athe bussuali or cayten yervm ceranpi jonary steele refsesn r cueletened comupe cootfbi pese dimn ried dev rsiti egoy beadedce onmefetes ay mutnusdred rocu m wallyomcan rey soasi herse tedl sabvig da
----- diversity: 1.2
----- Generating with seed: "time mom spent whole life praying person"
time mom spent whole life praying personc sevvet lagsaei icitoageded cunoncenudnhe danduo4 ttatreaerry p m s  chrtl 20osad fol. gu reminol 0 20qov titu w lendedd desouhinhku set r ysn windonw sing batt  larei ettid rarstytgeelyts  ssowig oflonyak 5tomstaatis anshactessem fo casite act ladepli qumot 20ongoetss r tidgas1 1ilphoeloway exderid y 20do nuintge  ove bare den feias cosive coyi1nghs ntton rogt  t renleme rhap alpdointd bond ssof
Epoch 59/60

----- Generat

<keras.callbacks.callbacks.History at 0x21ff80a53c8>