In [None]:
import numpy as np
import pandas as pd

In [None]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, GRU, Embedding, CuDNNGRU
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

In [None]:
dataset = pd.read_csv('comments.csv')

In [None]:
dataset

In [None]:
target = dataset['Rating'].values.tolist()
data = dataset['Review'].values.tolist()

In [None]:
cutoff = int(len(data) * 0.80)
x_train, x_test = data[:cutoff], data[cutoff:]
y_train, y_test = target[:cutoff], target[cutoff:]

In [None]:
x_train[500]

In [None]:
x_train[800]

In [None]:
y_train[800]

In [None]:
num_words = 10000
tokenizer = Tokenizer(num_words=num_words)

In [None]:
tokenizer.fit_on_texts(data)

In [None]:
tokenizer.word_index

In [None]:
x_train_tokens = tokenizer.texts_to_sequences(x_train)

In [None]:
x_train[800]

In [None]:
print(x_train_tokens[800])

In [None]:
x_test_tokens = tokenizer.texts_to_sequences(x_test)

In [None]:
num_tokens = [len(tokens) for tokens in x_train_tokens + x_test_tokens]
num_tokens = np.array(num_tokens)

In [None]:
np.mean(num_tokens)

In [None]:
np.max(num_tokens)

In [None]:
np.argmax(num_tokens)

In [None]:
x_train[21941]

In [None]:
max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens)
max_tokens = int(max_tokens)
max_tokens

In [None]:
np.sum(num_tokens < max_tokens) / len(num_tokens)

In [None]:
x_train_pad = pad_sequences(x_train_tokens, maxlen=max_tokens)

In [None]:
x_test_pad = pad_sequences(x_test_tokens, maxlen=max_tokens)

In [None]:
x_train_pad.shape

In [None]:
x_test_pad.shape

In [None]:
np.array(x_train_tokens[800])

In [None]:
x_train_pad[800]

In [None]:
idx = tokenizer.word_index
inverse_map = dict(zip(idx.values(), idx.keys()))

In [None]:
def tokens_to_string(tokens):
    words = [inverse_map[token] for token in tokens if token!=0]
    text = ' '.join(words)
    return text

In [None]:
x_train[800]

In [None]:
tokens_to_string(x_train_tokens[800])

In [None]:
model = Sequential()

In [None]:
embedding_size = 50

In [None]:
model.add(Embedding(input_dim=num_words,
                    output_dim=embedding_size,
                    input_length=max_tokens,
                    name='embedding_layer'))

In [None]:
model.add(GRU(units=16, return_sequences=True))
model.add(GRU(units=8, return_sequences=True))
model.add(GRU(units=4))
model.add(Dense(1, activation='sigmoid'))

In [None]:
optimizer = Adam(lr=1e-3)

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(x_train_pad, y_train, epochs=5, batch_size=256)

In [None]:
result = model.evaluate(x_test_pad, y_test)

In [None]:
result[1]

In [None]:
y_pred = model.predict(x=x_test_pad[0:1000])
y_pred = y_pred.T[0]

In [None]:
cls_pred = np.array([1.0 if p>0.5 else 0.0 for p in y_pred])

In [None]:
cls_true = np.array(y_test[0:1000])

In [None]:
incorrect = np.where(cls_pred != cls_true)
incorrect = incorrect[0]

In [None]:
len(incorrect)

In [None]:
idx = incorrect[0]
idx

In [None]:
text = x_test[idx]
text

In [None]:
y_pred[idx]

In [None]:
cls_true[idx]

In [None]:
text1 = "bu ürün çok iyi herkese tavsiye ederim"
text2 = "kargo çok hızlı aynı gün elime geçti"
text3 = "büyük bir hayal kırıklığı yaşadım bu ürün bu markaya yakışmamış"
text4 = "mükemmel"
text5 = "tasarımı harika ancak kargo çok geç geldi ve ürün açılmıştı tavsiye etmem"
text6 = "hiç resimde gösterildiği gibi değil"
text7 = "kötü yorumlar gözümü korkutmuştu ancak hiçbir sorun yaşamadım teşekkürler"
text8 = "hiç bu kadar kötü bir satıcıya denk gelmemiştim ürünü geri iade ediyorum"
text9 = "tam bir fiyat performans ürünü"
text10 = "beklediğim gibi çıkmadı"
texts = [text1, text2, text3, text4, text5, text6, text7, text8, text9, text10]

In [None]:
tokens = tokenizer.texts_to_sequences(texts)

In [None]:
tokens_pad = pad_sequences(tokens, maxlen=max_tokens)
tokens_pad.shape

In [None]:
model.predict(tokens_pad)