In [1]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers,models
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences



In [2]:
# veri seti yükleme
max_features = 10000 # en çok kullanılan 10000 kelimeyi kullan
maxlen = 100 # her yorumun maximum uzunluğu

In [3]:
# imdb
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
# yorumların uzunluklarını max 100 olacak şekilde ayarla
x_train = pad_sequences(x_train,maxlen=maxlen) # eğitim verisini maximum uzunluğa göre ayarla
x_test = pad_sequences(x_test,maxlen=maxlen)

In [5]:
word_index = imdb.get_word_index() #imdb de ki kelimelerin indexini al

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
# kelime dizinini geri döndürmek için test çevirelim
reverse_word_index = {index+3:word for word,index in word_index.items()} # ters dizin
reverse_word_index[0] = "<PAD>" # PAD ile eşleştir
reverse_word_index[1] = "<START>"
reverse_word_index[2] = "<UNK>"
reverse_word_index[3] = "<UNUSED>"

In [7]:
#örnek metinleri yazdırma

def decode_review(encoded_review):

  return " ".join([reverse_word_index.get(i,"?") for i in encoded_review]) #her sayıyı kelimeye çevirir


In [8]:
# rastgele 3 örnek yazdıralım
random_indices = np.random.choice(len(x_train),size=3,replace=False)# rastgele 3 index seç
for i in random_indices:
  print(f"Yorum: {decode_review(x_train[i])}")
  print(f"Etiket: {y_train[i]}")
  print()

Yorum: you cold and wondering why you gave it the time br br this show has some of <UNK> best comic actors put together in a story that is silly and <UNK> and the outcome is hilarious the dialogue and visual comedy is beautifully delivered and the two leads <UNK> and lowe are superb together this was made for them br br i can't really say anymore other than to <UNK> you to find this and watch it you won't be disappointed and in a world devoid of <UNK> humour this is a classic inane and harmless piece of comedic brilliance
Etiket: 1

Yorum: them in it until recently the first one i saw was <UNK> in the rain that made me a fan of <UNK> i think that is better too but i thought that this movie was good and like all movies there are some parts that are better than others but in my book it's an awesome movie and i love it frank and gene make a good team i have yet to see them together in take me out to the <UNK> but i'm sticking to my guns <UNK> saying that i really enjoyed it and that i lov

In [20]:
# Transformer katmanı

class TransformerBlock(layers.Layer):

  def __init__(self,embed_size,heads,dropout_rate = 0.3):
    super(TransformerBlock,self).__init__()

    self.attention = layers.MultiHeadAttention(num_heads=heads,key_dim=embed_size)
    self.norm1 = layers.LayerNormalization(epsilon=1e-6)#1. normalizasyon katmanmız
    self.norm2 = layers.LayerNormalization(epsilon=1e-6)#2. normalizasyon katmanmız

    self.feed_forward = tf.keras.Sequential([
        layers.Dense(4*embed_size,activation="relu"),
        layers.Dense(embed_size)
    ])
    self.dropout1 = layers.Dropout(dropout_rate)
    self.dropout2 = layers.Dropout(dropout_rate)

  def call(self,x,training):
    # dikkat (attention) mekanizmasını uygulayalım
    attention = self.attention(x,x)
    x = self.norm1(x+self.dropout1(attention,training=training))
    # feed forward katmanını uygulayalım
    feed_forward = self.feed_forward(x)
    x = self.norm2(x+self.dropout2(feed_forward,training=training))

    return x

In [21]:
# Transformer Modeli
class TransformerModel(models.Model):

  def __init__(self,num_layers,embed_size,heads,input_dim,output_dim,dropout_rate=0.1):
    super(TransformerModel,self).__init__()

    self.embedding = layers.Embedding(input_dim=input_dim,output_dim=embed_size)
    self.transformer_blocks = [TransformerBlock(embed_size,heads,dropout_rate) for _ in range(num_layers)]
    self.global_avg_pooling = layers.GlobalAveragePooling1D()
    self.dropout = layers.Dropout(dropout_rate)
    self.fc = layers.Dense(output_dim,activation="sigmoid")

  def call(self,x,training):
    x = self.embedding(x)
    for transformer in self.transformer_blocks:
      x = transformer(x,training=training)
    x = self.global_avg_pooling(x)
    x = self.dropout(x,training=training)
    x = self.fc(x)
    return x

In [22]:
# Model training

# hyperparameter tanımla
num_layers = 4 # kullanılacak transformer katmanları
embed_size = 64
num_heads = 4 # multi-head sayısı
input_dim = max_features
output_dim = 1 # ikili sınıflandırma 1-0
dropout_rate = 0.1

# modeli oluşturma
model = TransformerModel(
    num_layers,embed_size,num_heads,input_dim,output_dim,dropout_rate)

# modeli bir giriş verisi ile cağırarak inşaa etme
model.build(input_shape=(None,maxlen))

# compile
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])


model.summary()



In [None]:
# model eğitimi
history = model.fit(x_train,y_train,
                    epochs=5,batch_size=1024,
                    validation_data=(x_test,y_test))

In [None]:
plt.figure()
plt.subplot(1,2,1)
plt.plot(history.history["loss"],label="train_loss")
plt.plot(history.history["val_loss"],label="val_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.grid()
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history["accuracy"],label="train_accuracy")
plt.plot(history.history["val_accuracy"],label="val_accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
plt.grid()
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# kullanıcıdan metin girdisi al
def predict_sentiment(model,text,word_index,maxlen):

  # metni imdb formatında sayısala çevir
  encoded_text = [word_index.get(word,0) for word in text.lower().split()]# kelimeleri sayılara çevir
  padded_text = pad_sequences([encoded_text],maxlen=maxlen) # padding uygula
  prediction = model.predict(padded_text) # tahmin yap
  return prediction[0][0]


In [None]:
# imdb veri setindeki kelime dizini
word_index = imdb.get_word_index()

In [None]:
# kullanıcıdan metin al
user_input = input("Bir film yorumu girin:")
sentiment_score = predict_sentiment(model,user_input,word_index,maxlen)

if sentiment_score > 0.5:
  print(f"Pozitif bir yorum. -> skor: {sentiment_score}")
else:
  print(f"Negatif bir yorum. -> skor: {sentiment_score}")