In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
file_path = '/content/drive/My Drive/Colab Notebooks/Research Techniques II/train-00000-of-00001.parquet'

In [5]:
import pandas as pd

df = pd.read_parquet(file_path)
df.head()

Unnamed: 0,text,labels
0,"Tempalqin, baralqin, miqren, elm, təhsil",neutral
1,"Başa düşmürəm, mən o vaxt onlardan necə gedə b...",negative
2,Taksto fener en sonuncu para kubok qazanivaosk...,neutral
3,Halal olsun admin heyyətinə. Tək tük səhifələr...,positive
4,Dunya seyaheti etmek ucun limitsiz bilet ve pul,positive


In [6]:
df = df[df['labels'].isin(["positive", "negative"])].copy()
df['labels'] = df['labels'].map({"negative": 0, "positive": 1})

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.utils import to_categorical

In [7]:
# Tüm karakterleri tarayıp bir karakter -> index sözlüğü çıkar
all_text = ''.join(df['text'].astype(str).tolist()).lower()
char_set = sorted(list(set(all_text)))
char2idx = {c: i+1 for i, c in enumerate(char_set)}  # 0 = padding
idx2char = {i: c for c, i in char2idx.items()}
vocab_size = len(char2idx) + 1
print(f"Toplam karakter: {vocab_size}")

Toplam karakter: 302


In [8]:
def text_to_char_ids(text):
    return [char2idx.get(c, 0) for c in text.lower()]

X_seq = df['text'].astype(str).apply(text_to_char_ids).tolist()
maxlen = 300  # ortalama uzunluğa göre ayarlayabilirsin
X_pad = pad_sequences(X_seq, maxlen=maxlen, padding='post', truncating='post')

In [9]:
le = LabelEncoder()
y = le.fit_transform(df['labels'])  # 0-1 etikete çevir
y_cat = to_categorical(y)           # Keras için one-hot

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_cat, test_size=0.2, random_state=42, stratify=y)

In [14]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping

input_layer = Input(shape=(maxlen,))
x = Embedding(input_dim=vocab_size, output_dim=200, input_length=maxlen)(input_layer)
x = Bidirectional(GRU(128, dropout=0.3, recurrent_dropout=0.3))(x)
output = Dense(2, activation='softmax')(x)

model = Model(inputs=input_layer, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()



In [16]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [17]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=15,
    batch_size=128,
    callbacks=[early_stop]
)

Epoch 1/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 717ms/step - accuracy: 0.5362 - loss: 0.6879 - val_accuracy: 0.5634 - val_loss: 0.6768
Epoch 2/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 707ms/step - accuracy: 0.5635 - loss: 0.6749 - val_accuracy: 0.5666 - val_loss: 0.6719
Epoch 3/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 711ms/step - accuracy: 0.5811 - loss: 0.6687 - val_accuracy: 0.5962 - val_loss: 0.6584
Epoch 4/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 712ms/step - accuracy: 0.6125 - loss: 0.6506 - val_accuracy: 0.6427 - val_loss: 0.6262
Epoch 5/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 718ms/step - accuracy: 0.6495 - loss: 0.6172 - val_accuracy: 0.6555 - val_loss: 0.6079
Epoch 6/15
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 716ms/step - accuracy: 0.6650 - loss: 0.6083 - val_accuracy: 0.6748 - val_loss: 0.5913
Epoc

In [18]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.4f}")

[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 190ms/step - accuracy: 0.7314 - loss: 0.5218
Test Accuracy: 0.7325
