In [None]:
!wget https://www.dropbox.com/s/fz2d3s2ngq8aw2e/turkish_movie_sentiment_dataset.csv?dl=1 -O dataset.csv

In [None]:
import numpy as np
import pandas as pd

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, GRU, Embedding, Dropout, SimpleRNN, Conv1D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

In [None]:
dataset = pd.read_csv('dataset.csv')
dataset.head()

In [None]:
#özge
def convert(point):
  p = point.split(',')
  a = int(p[0])*10 + int(p[1])
  if a >= 25:
    return 1
  else:
    return 0

In [None]:
data_point =dataset['point'].apply(convert)
target = data_point.values.tolist()

data = dataset['comment'].values.tolist()

In [None]:
cutoff = int(len(data) * 0.8)
x_train, x_test = data[:cutoff], data[cutoff:]
y_train, y_test = target[:cutoff], target[cutoff:]

In [None]:
num_words = 10000
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(data)

In [None]:
x_train_tokens = tokenizer.texts_to_sequences(x_train)
x_train[100], x_train_tokens[100]

In [None]:
x_test_tokens = tokenizer.texts_to_sequences(x_test)
x_test[100], x_test_tokens[100]

In [None]:
num_tokens = [len(tokens) for tokens in x_train_tokens + x_test_tokens]
num_tokens = np.array(num_tokens)
np.mean(num_tokens), np.max(num_tokens), np.argmax(num_tokens)

In [None]:
max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens)
max_tokens = int(max_tokens)
max_tokens

In [None]:
np.sum(num_tokens < max_tokens) / len(num_tokens)

In [None]:
x_train_pad = pad_sequences(x_train_tokens, maxlen=max_tokens)
x_test_pad = pad_sequences(x_test_tokens, maxlen=max_tokens)
x_train_pad.shape, x_test_pad.shape

In [None]:
idx = tokenizer.word_index
inverse_map = dict(zip(idx.values(), idx.keys()))

In [None]:
def tokens_to_string(tokens):
    words = [inverse_map[token] for token in tokens if token!=0]
    text = ' '.join(words)
    return text

In [None]:
x_train_tokens[100], tokens_to_string(x_train_tokens[100])

In [None]:
embedding_size = 35
model = Sequential()

model.add(Embedding(input_dim=num_words,
                    output_dim=embedding_size,
                    input_length=max_tokens,
                    name='embedding_layer'))


model.add(Conv1D(filters=64, kernel_size=(2), padding="same", activation=None))
model.add(tf.keras.layers.LeakyReLU())
model.add(GRU(units=32, return_sequences=True))
model.add(Conv1D(filters=32, kernel_size=(2), padding="same", activation=None))
model.add(tf.keras.layers.LeakyReLU())
model.add(GRU(units=16, return_sequences=True))
model.add(Conv1D(filters=16, kernel_size=(2), padding="same", activation=None))
model.add(tf.keras.layers.LeakyReLU())
model.add(GRU(units=8, return_sequences=True))
model.add(Conv1D(filters=8, kernel_size=(2), padding="same", activation=None))
model.add(tf.keras.layers.LeakyReLU())
model.add(GRU(units=4))
model.add(Dense(16, activation='relu'))
model.add(Dense(4, activation='relu'))

model.add(Dense(1))
model.summary()

In [None]:
optimizer = RMSprop(lr=1e-3)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=optimizer,
              metrics=['accuracy'])

In [None]:
from tensorflow.data import Dataset
train_ds = Dataset.zip((Dataset.from_tensor_slices(x_train_pad), Dataset.from_tensor_slices(y_train)))
train_ds = train_ds.shuffle(1024).batch(256)
val_ds = Dataset.zip((Dataset.from_tensor_slices(x_test_pad), Dataset.from_tensor_slices(y_test)))
val_ds = val_ds.batch(256)
steps_per_epoch = len(x_train_pad) // 256
validation_steps = len(x_test_pad) // 256

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.2,patience=1, min_lr=1e-7, mode='max')

In [None]:
model.fit(train_ds.repeat(), epochs=5, steps_per_epoch=steps_per_epoch, validation_data=val_ds.repeat(), validation_steps=validation_steps, callbacks=[reduce_lr])