In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [6]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
import re
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelBinarizer,LabelEncoder
from sklearn.metrics import accuracy_score, f1_score,precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
# 
# vect
from sklearn.feature_extraction.text import CountVectorizer

train_df = pd.read_csv('../Translated/cleaned/train.csv')
test_df = pd.read_csv('../Translated/cleaned/test.csv')
cols_target = train_df.Label.unique().tolist()
le = LabelEncoder()
# y_train = lb.fit_transform(train_df['Label'])
train_df['label'] = le.fit_transform(train_df['Label'])
# y_train = pd.DataFrame(y_train, columns= lb.classes_)
# y_train
from sklearn.feature_extraction.text import HashingVectorizer

# train_df = pd.concat([train_df, y_train], axis = 1)
train_df
def clean_text(text):
    text = text.lower()
#     text = re.sub(r"what's", "what is ", text)
#     text = re.sub(r"\'s", " ", text)
#     text = re.sub(r"\'ve", " have ", text)
#     text = re.sub(r"can't", "cannot ", text)
#     text = re.sub(r"n't", " not ", text)
#     text = re.sub(r"i'm", "i am ", text)
#     text = re.sub(r"\'re", " are ", text)
#     text = re.sub(r"\'d", " would ", text)
#     text = re.sub(r"\'ll", " will ", text)
#     text = re.sub(r"\'scuse", " excuse ", text)
#     text = re.sub('\W', ' ', text)
    text = re.sub('\s+', ' ', text)
    text = text.strip(' ')
    return text
train_df['Text'] = train_df['Text'].map(lambda com : clean_text(com))
test_df['Text'] = test_df['Text'].map(lambda com : clean_text(com))
X = train_df.Text
test_X = test_df.Text


  import pandas.util.testing as tm


In [43]:
vocab_size = 45000  # Only consider the top 20k words
maxlen = 6000  # Only consider the first 200 words of each movie review
# (x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
# print(len(x_train), "Training sequences")
# print(len(x_val), "Validation sequences")


tfidf_vect = TfidfVectorizer(max_features=45000,ngram_range=(1, 3), max_df=0.7)
# tf_idf_vect.fit()
X_tfidf = tfidf_vect.fit_transform(X)
test_X_tfidf = tfidf_vect.transform(test_X)

from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_df['Label'])

y_train = pd.DataFrame(y_train, columns= lb.classes_)
# y_train

train_df = pd.concat([train_df, y_train], axis = 1)
# train_df

x_train, x_val, y_train, y_val = train_test_split(X_tfidf.toarray(), train_df[cols_target], test_size=0.1, random_state = 0,stratify = train_df['Label'])


# x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen, padding = 'post')
# x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen, padding = 'post')

In [44]:
# this is a test
# (x_train[0]==0).all()
# (x_train == 0).all()
# len(x_val)
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
# x = layers.Dense(20, activation="relu")(x)
# x = layers.Dropout(0.1)(x)
outputs = layers.Dense(20, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

AttributeError: module 'tensorflow.keras.layers' has no attribute 'MultiHeadAttention'

In [27]:
# (x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=100)
# (x_val.toarray() == 0).sum()
x_train.toarray().shape

(1292, 45000)

In [1]:
# x_train
import tensorflow as tf
tf.__version__

'2.3.0'