In [69]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [70]:
import pandas as pd

# Load the dataset
train_data = pd.read_csv('/content/drive/MyDrive/train.csv')
news_data = pd.read_excel('/content/drive/MyDrive/news.xls')

In [71]:
news_data.head()

Unnamed: 0,content,category,headline
0,"Dışişleri Bakanı Davutoğlu, Yunanistan ile Tür...",dünya,'Ortak vizyonumuz var'\r
1,İsrail Gazze Şeridi'nin kuzeyindeki bir tarlay...,dünya,İsrail'den Gazze Şeridi'ne hava saldırısı\r
2,Lübnan'ın başkenti Beyrut'ta düzenlenen bombal...,dünya,Cenaze için geniş güvenlik önlemleri alındı\r
3,KKTC'de Sendikal Platform genel grev başlattı....,dünya,Gözaltındaki sendikacılar serbest\r
4,"Türkiye'den yola çıkan Başak Bulut, Seçil Öznu...",dünya,Bisikletle Asya'da 3 bin kilometre yol katetti...


In [72]:
news_data.isnull().sum()

content     0
category    0
headline    0
dtype: int64

In [73]:
news_data.drop(columns=['category', 'headline'], inplace=True)

In [74]:
news_data.head()

Unnamed: 0,content
0,"Dışişleri Bakanı Davutoğlu, Yunanistan ile Tür..."
1,İsrail Gazze Şeridi'nin kuzeyindeki bir tarlay...
2,Lübnan'ın başkenti Beyrut'ta düzenlenen bombal...
3,KKTC'de Sendikal Platform genel grev başlattı....
4,"Türkiye'den yola çıkan Başak Bulut, Seçil Öznu..."


In [75]:
news_data.isnull().sum()

content    0
dtype: int64

In [76]:
# Train veri setinden 15480 ID'ye sahip satırı kaldırma (outlier)
train_data = train_data.drop(train_data[train_data['ID'] == 15480].index)

In [77]:
news_data.rename(columns={'content': 'Sentence'}, inplace=True)
merged_train = pd.concat([train_data, news_data], ignore_index=True)
merged_train['ID'] = range(len(merged_train))

In [78]:
merged_train.head()

Unnamed: 0,ID,Sentence
0,0,"sınıf , havuz ve açık deniz çalışmalarıyla , t..."
1,1,"bu standart , sualtında kendini rahat hisseden..."
2,2,"yapılan araştırmalar , öğrencilerin mevcut dal..."
3,3,"pdıc öğrencilerinde , psikolojik eğitim ve yet..."
4,4,"pdıc eğitiminin sağladığı güven ve rahatlık , ..."


In [79]:
merged_train.tail()

Unnamed: 0,ID,Sentence
94347,94347,Hizan'da jandarma ekipleri tarafından düzenlen...
94348,94348,"Rize'nin Ardeşen ilçesinde, tartışma üzerine b..."
94349,94349,"Eyüp'te düzenlenen operasyonda, 4 kilo 250 gra..."
94350,94350,"İslahiye ilçesinde, Suriyeli sığınmacıların ka..."
94351,94351,Adana'da tarım işçilerini taşıyan midibüs ile ...


In [80]:
merged_train.isnull().sum()

ID          0
Sentence    0
dtype: int64

In [81]:
merged_train.to_csv('/content/drive/MyDrive/merged_train.csv', index=False)

# Asciify the train set and create the final train dataset

In [82]:
import pandas as pd

def deasciify_sentence(sentence):
    asciify_map = {
        'a': 'a', 'A': 'A', 'b': 'b', 'B': 'B', 'c': 'c', 'C': 'C', 'ç': 'c', 'Ç': 'C', 'd': 'd', 'D': 'D', 'e': 'e', 'E': 'E', 'f': 'f', 'F': 'F', 'g': 'g', 'G': 'G',
        'ğ': 'g', 'Ğ': 'G', 'h': 'h', 'H': 'H', 'ı': 'i', 'I': 'I', 'i': 'i', 'İ': 'I', 'j': 'j', 'J': 'J', 'k': 'k', 'K': 'K', 'l': 'l', 'L': 'L', 'm': 'm', 'M': 'M',
        'n': 'n', 'N': 'N', 'o': 'o', 'O': 'O', 'ö': 'o', 'Ö': 'O', 'p': 'p', 'P': 'P', 'r': 'r', 'R': 'R', 's': 's', 'S': 'S', 'ş': 's', 'Ş': 'S', 't': 't', 'T': 'T',
        'u': 'u', 'U': 'U', 'ü': 'u', 'Ü': 'U', 'v': 'v', 'V': 'V', 'y': 'y', 'Y': 'Y', 'z': 'z', 'Z': 'Z'
    }


    deasciified_sentence = ''
    for char in sentence:
        if char in asciify_map:
            deasciified_sentence += asciify_map[char]
        else:
            deasciified_sentence += char
    return deasciified_sentence

# Read the CSV file
df = pd.read_csv('/content/drive/MyDrive/merged_train.csv')
df.dropna(inplace=True)
# Apply deasciification to the 'sentence' column
df['Sentence'] = df['Sentence'].apply(deasciify_sentence)

In [83]:
df.isnull().sum()

ID          0
Sentence    0
dtype: int64

In [84]:
df.tail()

Unnamed: 0,ID,Sentence
94347,94347,Hizan'da jandarma ekipleri tarafindan duzenlen...
94348,94348,"Rize'nin Ardesen ilcesinde, tartisma uzerine b..."
94349,94349,"Eyup'te duzenlenen operasyonda, 4 kilo 250 gra..."
94350,94350,"Islahiye ilcesinde, Suriyeli siginmacilarin ka..."
94351,94351,Adana'da tarim iscilerini tasiyan midibus ile ...


In [85]:
df.to_csv('/content/drive/MyDrive/merged_train_ascii.csv', index=False)

# Training

## Processing

In [86]:
import pandas as pd

train = pd.read_csv('/content/drive/MyDrive/final_train.csv')
train_ascii = pd.read_csv('/content/drive/MyDrive/final_deascii.csv')
test_data = pd.read_csv('/content/drive/MyDrive/test.csv')

train_ascii['Sentence'] = train_ascii['Sentence'].str.lower()
test_data['Sentence'] = test_data['Sentence'].str.lower()
merged_data = pd.merge(train_ascii, train, on='ID')

In [87]:
# Function to tokenize each letter in a sentence
def letter_tokenization(sentence):
    return list(sentence)

# Apply letter tokenization to each sentence in both columns
merged_data['Tokenized_x'] = merged_data['Sentence_x'].apply(letter_tokenization)
merged_data['Tokenized_y'] = merged_data['Sentence_y'].apply(letter_tokenization)
test_data['Tokenized'] = test_data['Sentence'].apply(letter_tokenization)

In [88]:
import numpy as np

# Create vocabulary
char_to_index = {}
index_to_char = {}

# Add special tokens
char_to_index['<PAD>'] = 0
index_to_char[0] = '<PAD>'
char_to_index['<UNK>'] = 1
index_to_char[1] = '<UNK>'
char_to_index['<EOS>'] = 2
index_to_char[2] = '<EOS>'

# Function to add words to vocabulary
def add_to_vocab(chars):
    for char in chars:
        if char not in char_to_index:
            char_to_index[char] = len(char_to_index)
            index_to_char[len(char_to_index) - 1] = char

# Create vocabulary from tokenized input and labels
merged_data['Tokenized_x'].apply(add_to_vocab)
merged_data['Tokenized_y'].apply(add_to_vocab)
test_data['Tokenized'].apply(add_to_vocab)

# Add <EOS> token to the end of each sentence
merged_data['Tokenized_x'] = merged_data['Tokenized_x'].apply(lambda x: x + ['<EOS>'])
merged_data['Tokenized_y'] = merged_data['Tokenized_y'].apply(lambda x: x + ['<EOS>'])
test_data['Tokenized'] = test_data['Tokenized'].apply(lambda x: x + ['<EOS>'])

In [89]:
# Convert tokens to indices
def tokens_to_indices(tokens):
    return [char_to_index[char] for char in tokens]

# Apply tokenization and indexing to the DataFrame
merged_data['Indexed_x'] = merged_data['Tokenized_x'].apply(tokens_to_indices)
merged_data['Indexed_y'] = merged_data['Tokenized_y'].apply(tokens_to_indices)

In [90]:
merged_data.head()

Unnamed: 0,ID,Sentence_x,Sentence_y,Tokenized_x,Tokenized_y,Indexed_x,Indexed_y
0,1,ekonomi,ekonomi,"[e, k, o, n, o, m, i, <EOS>]","[e, k, o, n, o, m, i, <EOS>]","[3, 4, 5, 6, 5, 7, 8, 2]","[3, 4, 5, 6, 5, 7, 8, 2]"
1,2,girisi,girişi,"[g, i, r, i, s, i, <EOS>]","[g, i, r, i, ş, i, <EOS>]","[9, 8, 10, 8, 11, 8, 2]","[9, 8, 10, 8, 144, 8, 2]"
2,3,guncelleme,güncelleme,"[g, u, n, c, e, l, l, e, m, e, <EOS>]","[g, ü, n, c, e, l, l, e, m, e, <EOS>]","[9, 12, 6, 13, 3, 14, 14, 3, 7, 3, 2]","[9, 145, 6, 13, 3, 14, 14, 3, 7, 3, 2]"
3,4,mit,mit,"[m, i, t, <EOS>]","[m, i, t, <EOS>]","[7, 8, 15, 2]","[7, 8, 15, 2]"
4,5,silahli,silahlı,"[s, i, l, a, h, l, i, <EOS>]","[s, i, l, a, h, l, ı, <EOS>]","[11, 8, 14, 16, 17, 14, 8, 2]","[11, 8, 14, 16, 17, 14, 146, 2]"


In [91]:
test_data.head()

Unnamed: 0,ID,Sentence,Tokenized
0,0,tr ekonomi ve politika haberleri turkiye nin ...,"[ , t, r, , e, k, o, n, o, m, i, , v, e, , ..."
1,1,uye girisi,"[ , u, y, e, , g, i, r, i, s, i, , <EOS>]"
2,2,son guncelleme 12:12,"[ , s, o, n, , g, u, n, c, e, l, l, e, m, e, ..."
3,3,imrali mit gorusmesi ihtiyac duyuldukca oluyor,"[ , i, m, r, a, l, i, , m, i, t, , g, o, r, ..."
4,4,suriye deki silahli selefi muhalifler yeni ku...,"[ , s, u, r, i, y, e, , d, e, k, i, , s, i, ..."


## Model

In [92]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Model parameters
vocab_size = len(char_to_index)
embedding_dim = 100
hidden_units = 64

# Define model architecture
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True),
    LSTM(units=hidden_units, return_sequences=True),
    Dense(units=vocab_size, activation='softmax')
])

In [93]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Pad sequences to a fixed length
max_length = 1200  # train ~ 1800, test ~ 1100
padded_input = pad_sequences(merged_data['Indexed_x'], maxlen=max_length, padding='post')
padded_label = pad_sequences(merged_data['Indexed_y'], maxlen=max_length, padding='post')

# Convert to numpy arrays
padded_input = np.array(padded_input)
padded_label = np.array(padded_label)

In [94]:
# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(padded_input, padded_label, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
 406/2360 [====>.........................] - ETA: 43s - loss: 0.0951 - accuracy: 0.9620

KeyboardInterrupt: 

#### Predictions

In [95]:
# Make predictions on a subset of the training set (first 2 examples)
train_predictions = model.predict(padded_input[0:4])
# Display examples
for idx in range(4):
    print("Example", idx+1)
    print("Input:", ''.join([index_to_char[i] for i in padded_input[idx] if i != 0]))  # Remove padding
    print("Label:", ''.join([index_to_char[i] for i in padded_label[idx] if i != 0]))  # Remove padding
    # Get predicted indices for the current example
    predicted_indices = train_predictions[idx].argmax(axis=1)
    # Remove padding and stop at <EOS> token
    predicted_sentence = []
    for i in predicted_indices:
        if i == 0:  # Stop at padding
            break
        if index_to_char[i] == '<EOS>':  # Stop at <EOS>
            break
        predicted_sentence.append(index_to_char[i])
    print("Prediction:", ''.join(predicted_sentence))
    print()

Example 1
Input: ekonomi<EOS>
Label: ekonomi<EOS>
Prediction: Ekonomi

Example 2
Input: girisi<EOS>
Label: girişi<EOS>
Prediction: Girişi

Example 3
Input: guncelleme<EOS>
Label: güncelleme<EOS>
Prediction: Güncelleme

Example 4
Input: mit<EOS>
Label: mit<EOS>
Prediction: Mit



In [96]:
# Load the test dataset
test_data = pd.read_csv('/content/drive/My Drive/test.csv')

#test_data['Sentence'] = test_data['Sentence'].str.lower()

# Tokenize test sentences
test_data['tokenized_input'] = test_data['Sentence'].apply(letter_tokenization)

test_data['tokenized_input'] = test_data['tokenized_input'].apply(lambda x: x + ['<EOS>'])

# Convert test sentences to indices
test_data['indexed_input'] = test_data['tokenized_input'].apply(tokens_to_indices)

# Pad test sequences
padded_test_input = pad_sequences(test_data['indexed_input'], maxlen=max_length, padding='post')

# Predict labels for test data
test_predictions = model.predict(padded_test_input)

# Convert predicted indices to sentences
predicted_sentences = []
for prediction in test_predictions:
    predicted_sentence = []
    for i in prediction.argmax(axis=1):
        if i == 0:  # Stop at padding
            break
        if index_to_char[i] == '<EOS>':  # Stop at <EOS>
            break
        predicted_sentence.append(index_to_char[i])
    predicted_sentences.append(''.join(predicted_sentence))

# Add predicted sentences to test_data
test_data['Predicted_Sentence'] = predicted_sentences



In [97]:
test_data.head(20)

Unnamed: 0,ID,Sentence,tokenized_input,indexed_input,Predicted_Sentence
0,0,tr ekonomi ve politika haberleri turkiye nin ...,"[ , t, r, , e, k, o, n, o, m, i, , v, e, , ...","[31, 15, 10, 31, 3, 4, 5, 6, 5, 7, 8, 31, 18, ...",tr ekonomi ve politika haberleri türkiye nin ...
1,1,uye girisi,"[ , u, y, e, , g, i, r, i, s, i, , <EOS>]","[31, 12, 21, 3, 31, 9, 8, 10, 8, 11, 8, 31, 2]",uye girişi
2,2,son guncelleme 12:12,"[ , s, o, n, , g, u, n, c, e, l, l, e, m, e, ...","[31, 11, 5, 6, 31, 9, 12, 6, 13, 3, 14, 14, 3,...",son güncelleme 12:12
3,3,Imrali Mit gorusmesi ihtiyac duyuldukca oluyor,"[ , I, m, r, a, l, i, , M, i, t, , g, o, r, ...","[31, 178, 7, 10, 16, 14, 8, 31, 150, 8, 15, 31...",smrali sit görüşmesi ihtiyaç duyuldukça oluyor
4,4,Suriye deki silahli selefi muhalifler yeni ku...,"[ , S, u, r, i, y, e, , d, e, k, i, , s, i, ...","[31, 163, 12, 10, 8, 21, 3, 31, 22, 3, 4, 8, 3...",suriye deki silahlı selefi mühalifler yeni ku...
5,5,ancak olum haberleri savastan cok tek tarafli...,"[ , a, n, c, a, k, , o, l, u, m, , h, a, b, ...","[31, 16, 6, 13, 16, 4, 31, 5, 14, 12, 7, 31, 1...",ancak olum haberleri savaştan çok tek taraflı...
6,6,Israil in 4 uncu gunune giren Gazze saldirila...,"[ , I, s, r, a, i, l, , i, n, , 4, , u, n, ...","[31, 178, 11, 10, 16, 8, 14, 31, 8, 6, 31, 34,...",ssrail in 4 üncü gününe giren sazze saldırıla...
7,7,Serbes: Memecan sen mizahci misin,"[ , S, e, r, b, e, s, :, , M, e, m, e, c, a, ...","[31, 163, 3, 10, 19, 3, 11, 20, 31, 150, 3, 7,...",serbes: iemecan sen mizahçı misın
8,8,Muslum Gurses yogun bakimda,"[ , M, u, s, l, u, m, , G, u, r, s, e, s, , ...","[31, 150, 12, 11, 14, 12, 7, 31, 156, 12, 10, ...",suslum surses yoğun bakımda
9,9,takip et: wwwradikalcomtr,"[ , t, a, k, i, p, , e, t, :, , w, w, w, r, ...","[31, 15, 16, 4, 8, 23, 31, 3, 15, 20, 31, 54, ...",takip et: wwwradikalcomtr


In [98]:
# Save only 'Id' and 'Predicted_sentence' columns to a new CSV file
test_data[['ID', 'Predicted_Sentence']].to_csv('predictions_w_news_data.csv', index=False)

GOT 0.80648 SCORE IN KAGGLE

## Changing Model Architecture
use BiLSTM

In [99]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Bidirectional, GRU, Dropout, Activation

# Model parameters
vocab_size = len(char_to_index)
embedding_dim = 100
hidden_units = 128
dropout_rate = 0.2

# Define model architecture
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True),
    Bidirectional(GRU(units=hidden_units, return_sequences=True)),
    Dropout(dropout_rate),
    Dense(units=vocab_size),
    Activation('softmax')
])

In [100]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Pad sequences to a fixed length
max_length = 1200  # train ~ 1800, test ~ 1100
padded_input = pad_sequences(merged_data['Indexed_x'], maxlen=max_length, padding='post')
padded_label = pad_sequences(merged_data['Indexed_y'], maxlen=max_length, padding='post')

# Convert to numpy arrays
padded_input = np.array(padded_input)
padded_label = np.array(padded_label)

In [101]:
# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(padded_input, padded_label, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
 353/2360 [===>..........................] - ETA: 1:20 - loss: 0.0394 - accuracy: 0.9875

KeyboardInterrupt: 

In [102]:
model.save("model_w_news_data.h5")

  saving_api.save_model(


#### Predictions

In [103]:
# Make predictions on a subset of the training set (first 4 examples)
train_predictions = model.predict(padded_input[0:4])
# Display examples
for idx in range(4):
    print("Example", idx+1)
    print("Input:", ''.join([index_to_char[i] for i in padded_input[idx] if i != 0]))  # Remove padding
    print("Label:", ''.join([index_to_char[i] for i in padded_label[idx] if i != 0]))  # Remove padding
    # Get predicted indices for the current example
    predicted_indices = train_predictions[idx].argmax(axis=1)
    # Remove padding and stop at <EOS> token
    predicted_sentence = []
    for i in predicted_indices:
        if i == 0:  # Stop at padding
            break
        if index_to_char[i] == '<EOS>':  # Stop at <EOS>
            break
        predicted_sentence.append(index_to_char[i])
    print("Prediction:", ''.join(predicted_sentence))
    print()

Example 1
Input: ekonomi<EOS>
Label: ekonomi<EOS>
Prediction: ekonomi

Example 2
Input: girisi<EOS>
Label: girişi<EOS>
Prediction: girişi

Example 3
Input: guncelleme<EOS>
Label: güncelleme<EOS>
Prediction: güncelleme

Example 4
Input: mit<EOS>
Label: mit<EOS>
Prediction: mit



In [104]:
# Load the test dataset
test_data = pd.read_csv('/content/drive/MyDrive/test.csv')

test_data['Sentence'] = test_data['Sentence'].str.lower()

# Tokenize test sentences
test_data['tokenized_input'] = test_data['Sentence'].apply(letter_tokenization)

test_data['tokenized_input'] = test_data['tokenized_input'].apply(lambda x: x + ['<EOS>'])

# Convert test sentences to indices
test_data['indexed_input'] = test_data['tokenized_input'].apply(tokens_to_indices)

# Pad test sequences
padded_test_input = pad_sequences(test_data['indexed_input'], maxlen=max_length, padding='post')

# Predict labels for test data
test_predictions = model.predict(padded_test_input)

# Convert predicted indices to sentences
predicted_sentences = []
for prediction in test_predictions:
    predicted_sentence = []
    for i in prediction.argmax(axis=1):
        if i == 0:  # Stop at padding
            break
        if index_to_char[i] == '<EOS>':  # Stop at <EOS>
            break
        predicted_sentence.append(index_to_char[i])
    predicted_sentences.append(''.join(predicted_sentence))

# Add predicted sentences to test_data
test_data['Predicted_Sentence'] = predicted_sentences



In [105]:
test_data.tail(30)

Unnamed: 0,ID,Sentence,tokenized_input,indexed_input,Predicted_Sentence
1127,1127,tecavuz sucundan hapse giren suclular genellik...,"[t, e, c, a, v, u, z, , s, u, c, u, n, d, a, ...","[15, 3, 13, 16, 18, 12, 24, 31, 11, 12, 13, 12...",tecavüz suçundan hapse giren suçlular genellik...
1128,1128,tekirdagin burgulusu cok meshurdur,"[t, e, k, i, r, d, a, g, i, n, , b, u, r, g, ...","[15, 3, 4, 8, 10, 22, 16, 9, 8, 6, 31, 19, 12,...",tekirdağın burgulüsu çok meshurdur
1129,1129,tencereleri bulasik makinesine yerlestirirken ...,"[t, e, n, c, e, r, e, l, e, r, i, , b, u, l, ...","[15, 3, 6, 13, 3, 10, 3, 14, 3, 10, 8, 31, 19,...",tencereleri bulaşık makinesine yerleştirirken ...
1130,1130,toplanilan isle murekkep yapardik,"[t, o, p, l, a, n, i, l, a, n, , i, s, l, e, ...","[15, 5, 23, 14, 16, 6, 8, 14, 16, 6, 31, 8, 11...",toplanılan işle mürekkep yapardık
1131,1131,trendeki kiza asilmasi cok buyuk ayipti,"[t, r, e, n, d, e, k, i, , k, i, z, a, , a, ...","[15, 10, 3, 6, 22, 3, 4, 8, 31, 4, 8, 24, 16, ...",trendeki kıza asılması çok büyük ayıptı
1132,1132,tulbent uzerine suzeni isinde buyuk bir ustali...,"[t, u, l, b, e, n, t, , u, z, e, r, i, n, e, ...","[15, 12, 14, 19, 3, 6, 15, 31, 12, 24, 3, 10, ...",tülbent üzerine süzeni isinde büyük bir ustalı...
1133,1133,turkiye ibisin anavatanindan biridir,"[t, u, r, k, i, y, e, , i, b, i, s, i, n, , ...","[15, 12, 10, 4, 8, 21, 3, 31, 8, 19, 8, 11, 8,...",türkiye ibisin anavatanından biridir
1134,1134,uykusunda bile fosurdatirdi,"[u, y, k, u, s, u, n, d, a, , b, i, l, e, , ...","[12, 21, 4, 12, 11, 12, 6, 22, 16, 31, 19, 8, ...",uykusunda bile fosurdatırdı
1135,1135,uzakdogudaki buruk gelenegi ulkeden ulkeye deg...,"[u, z, a, k, d, o, g, u, d, a, k, i, , b, u, ...","[12, 24, 16, 4, 22, 5, 9, 12, 22, 16, 4, 8, 31...",uzakdoğudaki büruk geleneği ülkeden ülkeye değ...
1136,1136,uzun zaman sonra bir araya gelip iki lafin bel...,"[u, z, u, n, , z, a, m, a, n, , s, o, n, r, ...","[12, 24, 12, 6, 31, 24, 16, 7, 16, 6, 31, 11, ...",uzun zaman sonra bir araya gelip iki lafın bel...


In [106]:
test_data[['ID', 'Predicted_Sentence']].to_csv('/content/drive/MyDrive/last_predictions.csv', index=False)

KAGGLE SCORE: 0.88761