In [None]:
#Data
texts = [
    "I am playing good cricket",
    "He is playing chess",
    "I like to watch cricket",
    "Chess is a mind game",
    "Cricket is played outdoors",
    "Chess pieces are intresting",
    "We played cricket yesterday",
    "He won the chess match"
    ]

In [None]:
#Target - cricket -> 0 , chess -> 1
labels = [0 , 1 , 0 , 1 , 0 , 1 , 0 , 1]

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from tensorflow.keras import models, layers

In [None]:
#Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
tokenizer.word_index

{'cricket': 1,
 'chess': 2,
 'is': 3,
 'i': 4,
 'playing': 5,
 'he': 6,
 'played': 7,
 'am': 8,
 'good': 9,
 'like': 10,
 'to': 11,
 'watch': 12,
 'a': 13,
 'mind': 14,
 'game': 15,
 'outdoors': 16,
 'pieces': 17,
 'are': 18,
 'intresting': 19,
 'we': 20,
 'yesterday': 21,
 'won': 22,
 'the': 23,
 'match': 24}

In [None]:
sequences = tokenizer.texts_to_sequences(texts)
sequences

[[4, 8, 5, 9, 1],
 [6, 3, 5, 2],
 [4, 10, 11, 12, 1],
 [2, 3, 13, 14, 15],
 [1, 3, 7, 16],
 [2, 17, 18, 19],
 [20, 7, 1, 21],
 [6, 22, 23, 2, 24]]

In [None]:
max_len = 5
padded_sequences = pad_sequences(sequences , maxlen=max_len, padding='pre')
padded_sequences

array([[ 4,  8,  5,  9,  1],
       [ 0,  6,  3,  5,  2],
       [ 4, 10, 11, 12,  1],
       [ 2,  3, 13, 14, 15],
       [ 0,  1,  3,  7, 16],
       [ 0,  2, 17, 18, 19],
       [ 0, 20,  7,  1, 21],
       [ 6, 22, 23,  2, 24]], dtype=int32)

In [None]:
print(type(padded_sequences))

<class 'numpy.ndarray'>


In [None]:
print(labels)

[0, 1, 0, 1, 0, 1, 0, 1]


In [None]:
labels = np.array(labels)

In [None]:
print(type(labels))

<class 'numpy.ndarray'>


In [None]:
vocab_size = len(tokenizer.word_index) + 1
vocab_size

25

In [None]:
embedding_dim = 3

In [None]:
model = models.Sequential()           #Define a model for neural network to learn from text

model.add(layers.Embedding(input_dim = vocab_size, output_dim = embedding_dim, input_length = 5)) #layer to create word embeddings (word vectors)

model.add(layers.Flatten())         #Flattening the embedded vectors to feed into neural networks

model.add(layers.Dense(5))          #Hidden layer with 5 neurons

model.add(layers.Dense(1 , activation = 'sigmoid'))          #Output layer with 1 neuron as we need to predict 1 or 0

model.summary()




In [None]:
#Optimize the error using adam
#Calculate the error using binary cross entropy
#Show the accuracy during learning process
model.compile(optimizer = 'adam', loss = 'binary_crossentropy' , metrics = ['accuracy'])

In [None]:
#Train your neural network model to recognize a text is about 'Cricket or about 'Chess'
model.fit(padded_sequences, labels, epochs=30)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 0.7078
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.0000e+00 - loss: 0.7048
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.2500 - loss: 0.7019
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.3750 - loss: 0.6990
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.5000 - loss: 0.6960
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6250 - loss: 0.6931
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.7500 - loss: 0.6902
Epoch 8/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.7500 - loss: 0.6874
Epoch 9/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7abd24b4ead0>

In [None]:
np.exp(-9)

np.float64(0.00012340980408667956)

In [None]:
new_text = "Today we have a cricket match"

tokenizer.texts_to_sequences([new_text])

[[20, 13, 1, 24]]

In [None]:
pad_seq = pad_sequences(tokenizer.texts_to_sequences([new_text]) , maxlen=5 , padding='pre')

model.predict(pad_seq)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


array([[0.48827684]], dtype=float32)

In [None]:
print(int(model.predict(pad_seq) > 0.5))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
0


  print(int(model.predict(pad_seq) > 0.5))


In [None]:
new_text = "Viswanathan Ananad is a legendry chess player"
pad_seq = pad_sequences(tokenizer.texts_to_sequences([new_text]) , maxlen=5 , padding='pre')
print(int(model.predict(pad_seq) > 0.5))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
1


  print(int(model.predict(pad_seq) > 0.5))
