In [1]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.preprocessing.text import Tokenizer

In [2]:
sentence=[
    'Today is a sunny day',
    'Today is a rainy day',
    'Is it sunny today?',
    'I really enjoyed walking in the snow today'
]

In [3]:
tokenizer=Tokenizer(num_words=100)

In [4]:
tokenizer.fit_on_texts(sentence)

In [5]:
word_index=tokenizer.word_index
print(word_index)

{'today': 1, 'is': 2, 'a': 3, 'sunny': 4, 'day': 5, 'rainy': 6, 'it': 7, 'i': 8, 'really': 9, 'enjoyed': 10, 'walking': 11, 'in': 12, 'the': 13, 'snow': 14}


In [6]:
sequence=tokenizer.texts_to_sequences(sentence)
print(sequence)

[[1, 2, 3, 4, 5], [1, 2, 3, 6, 5], [2, 7, 4, 1], [8, 9, 10, 11, 12, 13, 14, 1]]


In [7]:
test_data=[
    'Today is a snowy day',
    'Will it be rainy tomorrow?'
]

In [8]:
test_sequence=tokenizer.texts_to_sequences(test_data)
print(test_sequence)

[[1, 2, 3, 5], [7, 6]]


In [9]:
tokenizer_oov=Tokenizer(num_words=100,oov_token='<OOV>')
tokenizer_oov.fit_on_texts(sentence)
word_index=tokenizer_oov.word_index
print(word_index)

{'<OOV>': 1, 'today': 2, 'is': 3, 'a': 4, 'sunny': 5, 'day': 6, 'rainy': 7, 'it': 8, 'i': 9, 'really': 10, 'enjoyed': 11, 'walking': 12, 'in': 13, 'the': 14, 'snow': 15}


In [10]:
test_oov_sequence=tokenizer_oov.texts_to_sequences(test_data)
print(test_oov_sequence)

[[2, 3, 4, 1, 6], [1, 8, 1, 7, 1]]


In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [12]:
print(sequence)

[[1, 2, 3, 4, 5], [1, 2, 3, 6, 5], [2, 7, 4, 1], [8, 9, 10, 11, 12, 13, 14, 1]]


In [13]:
pad=pad_sequences(sequence)
print(pad)

[[ 0  0  0  1  2  3  4  5]
 [ 0  0  0  1  2  3  6  5]
 [ 0  0  0  0  2  7  4  1]
 [ 8  9 10 11 12 13 14  1]]


In [14]:
pad_p=pad_sequences(sequence,padding='post',maxlen=6)
print(pad_p)

[[ 1  2  3  4  5  0]
 [ 1  2  3  6  5  0]
 [ 2  7  4  1  0  0]
 [10 11 12 13 14  1]]


In [15]:
from bs4 import BeautifulSoup
import string

stopwords = ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at",
             "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do",
             "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having",
             "he", "hed", "hes", "her", "here", "heres", "hers", "herself", "him", "himself", "his", "how",
             "hows", "i", "id", "ill", "im", "ive", "if", "in", "into", "is", "it", "its", "itself",
             "lets", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought",
             "our", "ours", "ourselves", "out", "over", "own", "same", "she", "shed", "shell", "shes", "should",
             "so", "some", "such", "than", "that", "thats", "the", "their", "theirs", "them", "themselves", "then",
             "there", "theres", "these", "they", "theyd", "theyll", "theyre", "theyve", "this", "those", "through",
             "to", "too", "under", "until", "up", "very", "was", "we", "wed", "well", "were", "weve", "were",
             "what", "whats", "when", "whens", "where", "wheres", "which", "while", "who", "whos", "whom", "why",
             "whys", "with", "would", "you", "youd", "youll", "youre", "youve", "your", "yours", "yourself",
             "yourselves"]


In [16]:
sentence_a=' That which does not kill us makes us stronger.'
words =sentence_a.split()
filtered_sentence=''
for word in words:
    if word not in stopwords:
        filtered_sentence=filtered_sentence+word+' '
sentence.append(filtered_sentence)

print(sentence)

['Today is a sunny day', 'Today is a rainy day', 'Is it sunny today?', 'I really enjoyed walking in the snow today', 'That not kill us makes us stronger. ']


In [20]:
import string
sentence_b='''The individual has always had to struggle to keep from being overwhelmed by the tribe. 
If you try it, you will be lonely often, and sometimes frightened. But no price is too high to pay for the privilege of owning yourself.'''
tabel=str.maketrans('','',string.punctuation)
words=sentence_b.split()
filtered_sentence_b=''
for word in words:
    if word not in stopwords:
        filtered_sentence_b=filtered_sentence_b+word+' '
sentence.append(filtered_sentence_b)
print(sentence)
        



['Today is a sunny day', 'Today is a rainy day', 'Is it sunny today?', 'I really enjoyed walking in the snow today', 'That not kill us makes us stronger. ', 'The individual always struggle keep overwhelmed tribe. If try it, will lonely often, sometimes frightened. But no price high pay privilege owning yourself. ', 'The individual always struggle keep overwhelmed tribe. If try it, will lonely often, sometimes frightened. But no price high pay privilege owning yourself. ']
