<a href="https://colab.research.google.com/github/Xessen/LSTM-Variants/blob/main/BahdanauAttentionLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import sqlite3
from tensorflow.keras.layers import Embedding,LSTM,Dense,Dropout,Input,TextVectorization
from tensorflow.keras.models import  Model
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import string
import nltk
from google.colab import drive
from tensorflow.python.ops.numpy_ops import np_config


In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))
strategy = tf.distribute.TPUStrategy(resolver)

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip -q glove.6B.zip
drive.mount('/content/gdrive')
!cp '/content/gdrive/MyDrive/biletler/comments_data (5).db' '/content/'
drive.flush_and_unmount()

In [None]:
conn=sqlite3.connect('/content/comments_data (5).db')
sql_q=pd.read_sql_query("SELECT parent_data,data FROM comments",conn)
df=pd.DataFrame(sql_q,columns=['parent_data','data'])
df['parent_data']=df['parent_data'].str.replace('[{}]'.format(string.punctuation), '')
df['data']=df['data'].str.replace('[{}]'.format(string.punctuation), '')

In [None]:
vectorizer_=TextVectorization(max_tokens=8000,output_sequence_length=550)



vectorizer_.adapt(df['parent_data'])

voc_ = vectorizer_.get_vocabulary()
voc_.append('specialstartkey')
voc_.append('specialstopkey')

vectorizer=TextVectorization(max_tokens=8002,output_sequence_length=550,vocabulary=voc_)
voc=vectorizer.get_vocabulary()
word_index = dict(zip(voc, range(len(voc))))


In [None]:

embedding_index={}
f=open("/content/glove.6B.50d.txt","r")
for line in f:
  word,coefs=line.split(maxsplit=1)
  coefs=np.fromstring(coefs,"f",sep=" ")
  embedding_index[word]=coefs


In [None]:
num_tokens = len(voc) 
embedding_dim = 50


embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [None]:
class AttentionLayer(tf.keras.layers.Layer):
  def __init__(self,units):
    super(AttentionLayer,self).__init__()
    self.W1=Dense(units)
    self.W2=Dense(units)
    self.V=Dense(1)
    


    def call(self,query,values):

      #query=tf.expand_dims(query,1)

      score=self.V(tf.nn.tanh(self.W1(query)+self.W2(values)))

      attention_weights = tf.nn.softmax(score, axis=1)

      context_vector = attention_weights * values
      
      context_vector = tf.reduce_sum(context_vector, axis=1)

      #(1,256)
      return context_vector
      

In [None]:
class AttentionNLP(Model):
  def __init__(self,LSTMshape,num_tokens,embedding_dim,embedding_matrix,max_len):
    super(AttentionNLP,self).__init__()
    np_config.enable_numpy_behavior()

    self.max_len=max_len

    self.encoder_input=Input(shape=(max_len,))
    self.e_embed_layer=Embedding(num_tokens,embedding_dim,embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),trainable=False,mask_zero=True)
    self.encoder_lstm=LSTM(LSTMshape,return_state=True,return_sequences=True)


    self.encoder_lstm1=LSTM(LSTMshape,return_state=True)

    self.attention_layer=AttentionLayer(LSTMshape)

    self.decoder_input=Input(shape=(1,))
    self.d_embed_layer=Embedding(num_tokens,embedding_dim,embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),trainable=False,mask_zero=True)

    self.decoder_lstm=LSTM(LSTMshape,return_state=True,return_sequences=True)

    self.decoder_lstm1=LSTM(LSTMshape,return_state=True)

    self.decoder_dense=Dense(num_tokens)
  
  def call(self,inputs):
    all_outputs=[]
    encoder_embed_output=self.e_embed_layer(inputs)

    LSTM_o,state_h,state_c=self.encoder_lstm(encoder_embed_output)
    encoder_states=[state_h,state_c]
    LSTM_o1,state_h1,state_c1=self.encoder_lstm1(LSTM_o,initial_state=encoder_states)
    encoder_states_1=[state_h1,state_c1]

    attention_in=state_h1
    decoder_in=tf.cast(np.zeros((1,256)),dtype=tf.float32)
    decoder_initial_states=encoder_states_1
    for _ in range(self.max_len):
      context_vector=self.attention_layer(attention_in,LSTM_o1)
      #context_vector = tf.expand_dims(context_vector, 1)


      l_inp=tf.expand_dims(tf.concat([decoder_in,context_vector],-1),1)

      decoder_o,dstate_h,dstate_c=self.decoder_lstm(l_inp,initial_state=decoder_initial_states)
      decoder_states=[dstate_h,dstate_c]



      decoder_o1,dstate_h1,dstate_c1=self.decoder_lstm1(decoder_o,initial_state=decoder_states)
      decoder_initial_states=[dstate_h1,dstate_c1]
      decoder_in=decoder_initial_states[0]
      attention_in=decoder_initial_states[0]

      dec_out=self.decoder_dense(decoder_o1)
      d_out=tf.math.argmax(dec_out)
      all_outputs.append(d_out)


    return all_outputs

  def build_graph(self):
    x=Input(shape=(550,))

    return AttentionNLP(inputs=x,outputs=self.call(x))




In [None]:
my_model=AttentionNLP(256,8002,50,embedding_matrix,550)
my_model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",metrics=['accuracy'])

In [None]:
#with strategy.scope():
X_train, X_test, y_train, y_test=train_test_split(df['parent_data'],df['data'],test_size=0.2)
x_encoder_in=vectorizer(X_train)
y_train='specialstartkey '+y_train+' specialstopkey'
x_decoder_in=vectorizer(y_train)
y_decoder_out=vectorizer(y_train)
  

In [None]:
my_model.fit(x_encoder_in,y_decoder_out,batch_size=64,epochs=1)
