In [None]:
#check gpu running
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os

print("cwd ", os.getcwd())
print("ls ", os.listdir())



In [None]:
path="/content/gdrive/My Drive/"
print("ls ", os.listdir(path))

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame()
df = pd.read_csv(path+'file_name.csv', encoding='utf-8-sig')
df.head(-1)
#df.hist(bins=20, grid=False, figsize=(10,6), zorder=2 )

#ax = df.hist()


In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

review_lines = list()
lines = df['Description'].values.tolist()

#lines.pop(18060)

In [None]:
for line in lines:

    tokens = word_tokenize(line)
    # convert to lower case
    tokens = [w.lower() for w in tokens]
    # remove punctuation from each word    
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    words = [word for word in stripped if word.isalpha()]
    # filter out stop words
    stop_words = set(stopwords.words('turkish'))
    #print(stop_words)
    words = [w for w in words if not w in stop_words]
    review_lines.append(words)

In [None]:
import gensim 

EMBEDDING_DIM = 200
# train word2vec model
model = gensim.models.Word2Vec(sentences=review_lines, size=EMBEDDING_DIM, window=20, workers=5, min_count=30,negative=5,iter=20)
# vocab size
words = list(model.wv.vocab)
print('Vocabulary size: %d' % len(words))

In [None]:
# save model in ASCII (word2vec) format
filename = path+'deneme.txt'
model.wv.save_word2vec_format(filename, binary=False)

In [None]:
import os

embeddings_index = {}
f = open(os.path.join('', path+'200_egitim.txt'),  encoding = "utf-8-sig")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:])
    embeddings_index[word] = coefs
f.close()


In [None]:
max_length =200

In [None]:
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences



# vectorize the text samples into a 2D integer tensor
tokenizer_obj = Tokenizer(num_words=50000)
#tokenizer_obj.num_words=50000 #50000 most frequent words will be kept
tokenizer_obj.fit_on_texts(review_lines)
#print ("tokenizer_obj.word_count ",tokenizer_obj.word_counts)
sequences = tokenizer_obj.texts_to_sequences(review_lines)

# pad sequences
word_index = tokenizer_obj.word_index
print('Found %s unique tokens.' % len(word_index))

review_pad = pad_sequences(sequences, maxlen=max_length)
sentiment =  df['Category'].values
print('Shape of review tensor:', review_pad.shape)
print('Shape of sentiment tensor:', sentiment.shape)

# split the data into a training set and a validation set
indices = np.arange(review_pad.shape[0])
np.random.shuffle(indices)
review_pad = review_pad[indices]
sentiment = sentiment[indices]

X_train=review_pad[:17786]
y_train=sentiment[:17786]
X_test=review_pad[17786:]
y_test=sentiment[17786:]

In [None]:
EMBEDDING_DIM = 200
num_words = len(word_index) + 1
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))

for word, i in word_index.items():
    if i > num_words:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [None]:
#y_train=np.where(y_train == 5, 0 , y_train)
y_train=tf.keras.utils.to_categorical(y_train, num_classes=5)
#y_test=np.where(y_test == 5, 0 , y_test)
y_test=tf.keras.utils.to_categorical(y_test, num_classes=5)

In [None]:
#test_samples_tokens = tokenizer_obj.texts_to_sequences(test_samples)
#test_samples_tokens_pad = pad_sequences(test_samples_tokens, maxlen=MAX_SEQUENCE_LENGTH)

#predict
#model.predict(x=test_samples_tokens_pad)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers.embeddings import Embedding
from keras.initializers import Constant

In [None]:
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'embed_dim': self.embed_dim,
            'num_heads': self.num_heads,
            'projection_dim': self.projection_dim,
            'query_dense': self.query_dense,
            'key_dense': self.key_dense,
            'value_dense': self.value_dense,
            'combine_heads': self.combine_heads,
        })
        return config

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="softmax"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'att': self.att,
            'ffn': self.ffn,
            'layernorm1': self.layernorm1,
            'layernorm2': self.layernorm2,
            'dropout1': self.dropout1,
            'dropout2': self.dropout2,
        })
        return config

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
num_heads = 5  # Number of attention heads
max_length=200
ff_dim = 200  # Hidden layer size in feed forward network inside transformer


inputs = layers.Input(shape=(max_length,))

#embedding_layer = TokenAndPositionEmbedding(max_length, num_words, EMBEDDING_DIM)

embedding_layer = Embedding(num_words,
                            EMBEDDING_DIM,
                            embeddings_initializer=Constant(embedding_matrix),
                            input_length=max_length,
                            trainable=False)

x = embedding_layer(inputs)
transformer_block = TransformerBlock(EMBEDDING_DIM, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation="softmax")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation="softmax")(x)

outputs = layers.Dense(5, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
print(model.summary())

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, batch_size=512, epochs=10, validation_data=(X_test, y_test),verbose=1)

In [None]:
import numpy as np
np.argmax(y_pred,axis=-1)

In [None]:
import keras

In [None]:
model = keras.models.load_model(path+'model')

In [None]:
model = model.load()

In [None]:
model.summary()

In [None]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa

In [None]:
y_pred=model.predict_classes(X_test)
con_mat = tf.math.confusion_matrix(labels=y_test.argmax(axis=1), predictions=X_test).numpy()

In [None]:
classes=[0,1,2,3,4]

In [None]:
con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)

con_mat_df = pd.DataFrame(con_mat_norm,
                     index = classes, 
                     columns = classes)

In [None]:
import seaborn as sns
figure = plt.figure(figsize=(8, 8))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()