In [26]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [27]:
# Recurrent Neural Network - Embedding Technique demo
# Author: Muhammad Humayun Khan

docs = [
    'sun rises every morning',
    'waves crash on shore',
    'trees sway in wind',
    'stars twinkle at night',
    'birds chirp at dawn',
    'rain falls on leaves',
    'snow blankets the ground',
    'fire crackles in fireplace',
    'wind whispers through trees',
    'clouds drift across sky'
]


In [28]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()

In [29]:
tokenizer.fit_on_texts(docs)

In [30]:
# Calculate the vocabulary size
vocab_size = len(tokenizer.word_index) + 1

In [31]:
# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[6, 7, 8, 9],
 [10, 11, 1, 12],
 [2, 13, 3, 4],
 [14, 15, 5, 16],
 [17, 18, 5, 19],
 [20, 21, 1, 22],
 [23, 24, 25, 26],
 [27, 28, 3, 29],
 [4, 30, 31, 2],
 [32, 33, 34, 35]]

In [32]:
from tensorflow.keras.utils import pad_sequences

# Pad sequences to ensure uniform length
padded_sequences = pad_sequences(sequences, padding='post')

padded_sequences

array([[ 6,  7,  8,  9],
       [10, 11,  1, 12],
       [ 2, 13,  3,  4],
       [14, 15,  5, 16],
       [17, 18,  5, 19],
       [20, 21,  1, 22],
       [23, 24, 25, 26],
       [27, 28,  3, 29],
       [ 4, 30, 31,  2],
       [32, 33, 34, 35]], dtype=int32)

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Input

# Initialize the Sequential model
model = Sequential()

# Add an Input layer to define the input shape
model.add(Input(shape=(padded_sequences.shape[1],)))  

# Add the Embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=2))

# Print the model summary
model.summary()

In [34]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [35]:
# Predict using the model
pred = model.predict(padded_sequences)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[[[-0.02723317  0.03681267]
  [ 0.01589848  0.04914386]
  [ 0.03680399  0.02131616]
  [ 0.04016047 -0.01777906]]

 [[ 0.01350567 -0.04352203]
  [-0.00495846  0.04025641]
  [ 0.04421887 -0.02128139]
  [ 0.00247734 -0.02061142]]

 [[ 0.02797972 -0.00572317]
  [ 0.00281354 -0.04030428]
  [-0.02483395  0.01475779]
  [-0.01054281  0.03968115]]

 [[-0.04983203  0.02329067]
  [-0.01090353  0.03323681]
  [ 0.03487826  0.01287827]
  [-0.01443909 -0.03641034]]

 [[ 0.03892003 -0.01900892]
  [-0.03040818 -0.03822402]
  [ 0.03487826  0.01287827]
  [ 0.01312323  0.00290861]]

 [[-0.02705109  0.03729117]
  [-0.04368577  0.01749914]
  [ 0.04421887 -0.02128139]
  [ 0.02003148 -0.04389501]]

 [[ 0.01448568  0.02445247]
  [-0.04345942 -0.03332945]
  [ 0.00691196 -0.00107651]
  [ 0.03528258 -0.01852711]]

 [[ 0.04840981 -0.04496697]
  [ 0.03669728 -0.04416635]
  [-0.02483395  0.01475779]
  [-0.00975783  0.01692512]]

 [[-0.01054281  