In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import random

data = pd.read_csv("real_nonce.csv")
df = pd.DataFrame(data)
teacher = df["prime"]
learner = df["target"]

pairs = list(zip(teacher, learner))

In [2]:


# Character processing remains the same
all_text = [char for src, tgt in pairs for char in src + tgt]
chars = sorted(set(all_text))
char2idx = {c: i+4 for i, c in enumerate(chars)}
char2idx["<PAD>"] = 0
char2idx["<SOS>"] = 1
char2idx["<EOS>"] = 2
char2idx["<UNK>"] = 3
idx2char = {i: c for c, i in char2idx.items()}
vocab_size = len(char2idx)

class VerbPairDataset(Dataset):
    def __init__(self, pairs, char2idx, max_len=20):
        self.pairs = pairs
        self.char2idx = char2idx
        self.max_len = max_len

    def encode(self, word, add_sos=False):
        if not word:  # Handle empty input
            word = "<UNK>"
        seq = [char2idx.get(c, char2idx["<UNK>"]) for c in word]
        if add_sos:
            seq = [char2idx["<SOS>"]] + seq
        seq = seq + [char2idx["<EOS>"]]
        seq += [char2idx["<PAD>"]] * (self.max_len - len(seq))
        return seq[:self.max_len]

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        src, tgt = self.pairs[idx]
        src_encoded = self.encode(src)
        tgt_encoded = self.encode(tgt, add_sos=True)
        return torch.tensor(src_encoded), torch.tensor(tgt_encoded)
    
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
        self.v = nn.Linear(hidden_dim, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        # hidden: (1, batch_size, hidden_dim)
        # encoder_outputs: (batch_size, seq_len, hidden_dim)
        hidden = hidden.squeeze(0)  # (batch_size, hidden_dim)
        
        # Repeat hidden for each encoder output
        hidden_expanded = hidden.unsqueeze(1).expand(-1, encoder_outputs.shape[1], -1)  # (batch_size, seq_len, hidden_dim)
        
        # Concatenate and score
        energy = torch.tanh(self.attn(torch.cat((hidden_expanded, encoder_outputs), dim=2)))  # (batch_size, seq_len, hidden_dim)
        attention_scores = self.v(energy).squeeze(2)  # (batch_size, seq_len)
        
        return torch.softmax(attention_scores, dim=1)
        
class Encoder(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, hidden_dim)
        self.ln = nn.LayerNorm(hidden_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        outputs, (hidden, cell) = self.lstm(embedded)

        # Merge bidirectional outputs
        outputs = self.fc(outputs)  # (batch_size, seq_len, hidden_dim)
        outputs = self.ln(outputs)

        # Adjust hidden state (sum bidirectional states)
        hidden = hidden.view(2, -1, hidden.shape[2])  # (2, batch_size, hidden_dim)
        hidden = torch.sum(hidden, dim=0).unsqueeze(0)  # (1, batch_size, hidden_dim)
        hidden = torch.tanh(hidden)

        # Ensure cell state is also 3D
        cell = cell.view(2, -1, cell.shape[2])
        cell = torch.sum(cell, dim=0).unsqueeze(0)

        return outputs, hidden, cell
        
class Decoder(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim + hidden_dim, hidden_dim, batch_first=True)
        self.attention = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, vocab_size)
        self.ln = nn.LayerNorm(hidden_dim)

    def forward(self, x, hidden, cell, encoder_outputs):
        x = x.unsqueeze(1)  # (batch_size, 1)
        embedded = self.embedding(x)  # (batch_size, 1, emb_dim)

        # Ensure hidden/cell are 3D: (num_layers, batch_size, hidden_dim)
        if hidden.dim() == 2:
            hidden = hidden.unsqueeze(0)  # (1, batch_size, hidden_dim)
        if cell.dim() == 2:
            cell = cell.unsqueeze(0)

        # Calculate attention
        attn_weights = self.attention(hidden, encoder_outputs)  # (batch_size, seq_len)
        context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs)  # (batch_size, 1, hidden_dim)

        # Combine embedding and context
        lstm_input = torch.cat((embedded, context), dim=2)  # (batch_size, 1, emb_dim + hidden_dim)

        # LSTM step
        output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
        output = self.ln(output)

        # Final prediction
        output = torch.cat((output.squeeze(1), context.squeeze(1)), dim=1)
        prediction = self.fc(output)

        return prediction, hidden, cell, attn_weights
def train_seq2seq(encoder, decoder, dataloader, epochs=20):
    criterion = nn.CrossEntropyLoss(ignore_index=char2idx["<PAD>"])
    optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.0001)
    
    for epoch in range(epochs):
        total_loss = 0
        teacher_forcing_ratio = 0.8 * (0.9 ** epoch)
        
        for src, tgt in dataloader:
            optimizer.zero_grad()
            
            if src.nelement() == 0 or tgt.nelement() == 0:
                continue
                
            # Encoder forward (returns all outputs)
            encoder_outputs, hidden, cell = encoder(src)
            input_token = tgt[:, 0]
            loss = 0
            
            for t in range(1, tgt.shape[1]):
                # Decoder with attention
                output, hidden, cell, _ = decoder(
                    input_token, hidden, cell, encoder_outputs
                )
                step_loss = criterion(output, tgt[:, t])
                
                if torch.isnan(step_loss).any():
                    continue
                    
                loss += step_loss
                teacher_force = random.random() < teacher_forcing_ratio
                input_token = tgt[:, t] if teacher_force else output.argmax(1)
            
            if torch.isnan(loss).any() or loss == 0:
                continue
                
            loss.backward()
            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 1.0)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 1.0)
            optimizer.step()
            
            total_loss += loss.item() / tgt.shape[1]
        
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")


In [15]:
# Initialize with larger batch size
dataset = VerbPairDataset(pairs, char2idx)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)  # Increased from 2 to 16

encoder = Encoder(vocab_size, emb_dim=64, hidden_dim=128)
decoder = Decoder(vocab_size, emb_dim=64, hidden_dim=128)

train_seq2seq(encoder, decoder, dataloader, epochs=200)



Epoch 1, Loss: 1.6551
Epoch 2, Loss: 1.2270
Epoch 3, Loss: 1.0619
Epoch 4, Loss: 1.0409
Epoch 5, Loss: 0.9926
Epoch 6, Loss: 0.9805
Epoch 7, Loss: 0.9445
Epoch 8, Loss: 0.9570
Epoch 9, Loss: 0.9485
Epoch 10, Loss: 0.9909
Epoch 11, Loss: 0.9856
Epoch 12, Loss: 0.9636
Epoch 13, Loss: 0.9487
Epoch 14, Loss: 0.9067
Epoch 15, Loss: 0.9698
Epoch 16, Loss: 0.9602
Epoch 17, Loss: 0.9495
Epoch 18, Loss: 0.9432
Epoch 19, Loss: 0.9565
Epoch 20, Loss: 0.9475
Epoch 21, Loss: 0.8912
Epoch 22, Loss: 0.8783
Epoch 23, Loss: 0.8911
Epoch 24, Loss: 0.8889
Epoch 25, Loss: 0.8490
Epoch 26, Loss: 0.8898
Epoch 27, Loss: 0.8877
Epoch 28, Loss: 0.8511
Epoch 29, Loss: 0.8674
Epoch 30, Loss: 0.8431
Epoch 31, Loss: 0.8659
Epoch 32, Loss: 0.8226
Epoch 33, Loss: 0.7983
Epoch 34, Loss: 0.7861
Epoch 35, Loss: 0.7975
Epoch 36, Loss: 0.7775
Epoch 37, Loss: 0.7767
Epoch 38, Loss: 0.7694
Epoch 39, Loss: 0.7759
Epoch 40, Loss: 0.7388
Epoch 41, Loss: 0.7431
Epoch 42, Loss: 0.7557
Epoch 43, Loss: 0.7557
Epoch 44, Loss: 0.72

In [3]:
def predict(encoder, decoder, word, char2idx, idx2char, max_len=20):
    encoder.eval()
    decoder.eval()

    # Prepare input tensor
    seq = [char2idx.get(c, char2idx["<UNK>"]) for c in word] + [char2idx["<EOS>"]]
    seq += [char2idx["<PAD>"]] * (max_len - len(seq))
    src_tensor = torch.tensor([seq[:max_len]], dtype=torch.long)

    # Encoder forward
    with torch.no_grad():
        encoder_outputs, hidden, cell = encoder(src_tensor)

    # Start decoding
    input_token = torch.tensor([char2idx["<SOS>"]], dtype=torch.long)
    decoded_indices = []

    for _ in range(max_len):
        with torch.no_grad():
            output, hidden, cell, attn_weights = decoder(
                input_token, hidden, cell, encoder_outputs
            )
        prediction = output.argmax(1).item()

        if prediction == char2idx["<EOS>"]:
            break

        decoded_indices.append(prediction)
        input_token = torch.tensor([prediction], dtype=torch.long)

    return ''.join(idx2char[idx] for idx in decoded_indices)


In [4]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import editdistance


In [5]:
smoothie = SmoothingFunction().method4

def compute_metrics(row):
    pred = row["target_pred"]
    truth = row["target"]

    # Exact match
    is_exact = pred == truth

    # Edit distance
    edit_dist = editdistance.eval(pred, truth)

    # Character accuracy
    max_len = max(len(pred), len(truth))
    char_acc = 1 - (edit_dist / max_len) if max_len > 0 else 1.0

    # BLEU score (character-level)
    bleu = sentence_bleu(
        [list(truth)], list(pred), weights=(1, 0, 0, 0), smoothing_function=smoothie
    )

    return pd.Series([is_exact, edit_dist, char_acc, bleu],
                     index=["is_exact", "edit_distance", "char_accuracy", "bleu"])


In [6]:
df["target_pred"] = df["prime"].apply(lambda word: predict(encoder, decoder, word, char2idx, idx2char, max_len=20))

NameError: name 'encoder' is not defined

In [11]:
df[["is_exact", "edit_distance", "char_accuracy", "bleu"]] = df.apply(compute_metrics, axis=1)


In [12]:


print("Exact Match Accuracy:", df["is_exact"].mean())
print("Average Edit Distance:", df["edit_distance"].mean())
print("Average Character Accuracy:", df["char_accuracy"].mean())
print("Average BLEU Score:", df["bleu"].mean())


Exact Match Accuracy: 0.8866213151927438
Average Edit Distance: 0.42403628117913833
Average Character Accuracy: 0.9490202352447251
Average BLEU Score: 0.9609782440218038


In [25]:
# Save model and optimizer states
torch.save({
    'encoder_state_dict': encoder.state_dict(),
    'decoder_state_dict': decoder.state_dict(),
}, 'l2_prime_target_seq2seq_attention_model.pth')

In [3]:
# Rebuild the models with the same dimensions used during training
emb_dim = 64        # or whatever value you used
hidden_dim = 128    # match your training setup

encoder = Encoder(vocab_size, emb_dim, hidden_dim)
decoder = Decoder(vocab_size, emb_dim, hidden_dim)

# Load the checkpoint
checkpoint = torch.load('l2_prime_target_seq2seq_attention_model.pth', map_location='cuda')
encoder.load_state_dict(checkpoint['encoder_state_dict'])
decoder.load_state_dict(checkpoint['decoder_state_dict'])

# Set to eval mode
encoder.eval()
decoder.eval()


Decoder(
  (embedding): Embedding(67, 64)
  (lstm): LSTM(192, 128, batch_first=True)
  (attention): Attention(
    (attn): Linear(in_features=256, out_features=128, bias=True)
    (v): Linear(in_features=128, out_features=1, bias=False)
  )
  (fc): Linear(in_features=256, out_features=67, bias=True)
  (ln): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)

In [11]:
import torch.nn.functional as F

def compute_surprisal(decoder, encoder_outputs, hidden, cell, tgt_seq):
    surprisal = 0.0
    input_token = tgt_seq[:, 0]
    for t in range(1, tgt_seq.shape[1]):
        output, hidden, cell, _ = decoder(input_token, hidden, cell, encoder_outputs)
        log_probs = F.log_softmax(output, dim=1)
        target_token = tgt_seq[:, t]
        token_log_prob = log_probs.gather(1, target_token.unsqueeze(1)).squeeze(1)
        surprisal += -token_log_prob
        input_token = target_token  # Greedy decoding
    return surprisal.mean().item()


In [5]:
def get_embedding_output(encoder, word_tensor):
    with torch.no_grad():
        outputs, _, _ = encoder(word_tensor)
        # Mean pooling over non-padding tokens
        mask = (word_tensor != char2idx["<PAD>"]).unsqueeze(2)
        pooled = (outputs * mask).sum(1) / mask.sum(1)
    return pooled


In [12]:
def encode_tensor(word, char2idx, max_len=20, add_sos=False):
    seq = [char2idx.get(c, char2idx["<UNK>"]) for c in word]
    if add_sos:
        seq = [char2idx["<SOS>"]] + seq
    seq = seq + [char2idx["<EOS>"]]
    seq += [char2idx["<PAD>"]] * (max_len - len(seq))
    seq = seq[:max_len]
    return torch.tensor([seq])  # Batch size = 1

In [13]:
# Example inputs
word1 = "поговорить"
word2 = "говорить"

# Convert to tensors
tensor1 = encode_tensor(word1, char2idx)
tensor2 = encode_tensor(word2, char2idx)

# Feed to encoder
vec1 = get_embedding_output(encoder, tensor1)
vec2 = get_embedding_output(encoder, tensor2)

# Cosine similarity
similarity = F.cosine_similarity(vec1, vec2)
print("Cosine similarity:", similarity.item())


Cosine similarity: 0.9397709369659424


In [20]:
unrelated_prime = "все-таки"
target = "СМОТРЕТЬ"

src_tensor = encode_tensor(unrelated_prime, char2idx, max_len=20)
tgt_tensor = encode_tensor(target, char2idx, max_len=20, add_sos=True)

encoder_outputs, hidden, cell = encoder(src_tensor)
surprisal_value = compute_surprisal(decoder, encoder_outputs, hidden, cell, tgt_tensor)

print(f"Surprisal for '{target}' after '{unrelated_prime}': {surprisal_value:.4f}")


Surprisal for 'СМОТРЕТЬ' after 'все-таки': 154.8890


In [21]:
related_prime = "посмотреть"
target = "СМОТРЕТЬ"

src_tensor = encode_tensor(related_prime, char2idx, max_len=20)
tgt_tensor = encode_tensor(target, char2idx, max_len=20, add_sos=True)

encoder_outputs, hidden, cell = encoder(src_tensor)
surprisal_value = compute_surprisal(decoder, encoder_outputs, hidden, cell, tgt_tensor)

print(f"Surprisal for '{target}' after '{related_prime}': {surprisal_value:.4f}")

Surprisal for 'СМОТРЕТЬ' after 'посмотреть': 127.4068


In [8]:
def get_embedding_output(encoder, word_tensor):
    with torch.no_grad():
        outputs, _, _ = encoder(word_tensor)
        # Mean pooling over non-padding tokens
        mask = (word_tensor != char2idx["<PAD>"]).unsqueeze(2)
        pooled = (outputs * mask).sum(1) / mask.sum(1)
    return pooled


In [23]:
stimulus = {
    "БЕЖАТЬ": "бежал",
    "БРОСИТЬ": "бросил",
    "ВСТРЕЧАТЬ": "встречал",
    "ВЫБРАТЬ": "выбрал",
    "ЗАБЫТЬ": "забыл",
    "КРИЧАТЬ": "кричал",
    "НАЧАТЬ": "начал",
    "ОЖИДАТЬ": "ожидал",
    "ПИСАТЬ": "писал",
    "ПЛАКАТЬ": "плакал",
    "ВЕРИТЬ": "поверить",
    "ГОВОРИТЬ": "поговорить",
    "ДЕРЖАТЬ": "подержать",
    "ДУМАТЬ": "подумать",
    "ЖЕЛАТЬ": "пожелать",
    "ИГРАТЬ": "поиграть",
    "ИСКАТЬ": "поискать",
    "КУРИТЬ": "покурить",
    "ЛЕЖАТЬ": "полежать",
    "ПОМНИТЬ": "помнил",
    "ПОНЯТЬ": "понял",
    "ПРОСИТЬ": "попросить",
    "СИДЕТЬ": "посидеть",
    "СЛУШАТЬ": "послушать",
    "СМОТРЕТЬ": "посмотреть",
    "СТАВИТЬ": "поставить",
    "СТОЯТЬ": "постоять",
    "СТРАДАТЬ": "пострадать",
    "СТРОИТЬ": "построить",
    "СЧИТАТЬ": "посчитать",
    "ТЕРЯТЬ": "потерять",
    "ТРАТИТЬ": "потратить",
    "РЕШИТЬ": "решил",
    "СЛЫШАТЬ": "слышал",
    "СНИМАТЬ": "снимал",
    "СОБРАТЬ": "собрал",
    "СООБЩИТЬ": "сообщил",
    "УБИТЬ": "убил",
    "УЗНАТЬ": "узнал",
    "УМЕТЬ": "умел"
}

for target, prime in stimulus.items():
    src_tensor = encode_tensor(prime, char2idx, max_len=20)
    tgt_tensor = encode_tensor(target, char2idx, max_len=20, add_sos=True)
    encoder_outputs, hidden, cell = encoder(src_tensor)
    surprisal_value = compute_surprisal(decoder, encoder_outputs, hidden, cell, tgt_tensor)

    print(f"Surprisal for '{target}' after '{prime}': {surprisal_value:.4f}")


Surprisal for 'БЕЖАТЬ' after 'бежал': 139.7279
Surprisal for 'БРОСИТЬ' after 'бросил': 156.0452
Surprisal for 'ВСТРЕЧАТЬ' after 'встречал': 109.6985
Surprisal for 'ВЫБРАТЬ' after 'выбрал': 143.4145
Surprisal for 'ЗАБЫТЬ' after 'забыл': 152.1673
Surprisal for 'КРИЧАТЬ' after 'кричал': 141.2435
Surprisal for 'НАЧАТЬ' after 'начал': 136.0975
Surprisal for 'ОЖИДАТЬ' after 'ожидал': 138.0157
Surprisal for 'ПИСАТЬ' after 'писал': 163.3674
Surprisal for 'ПЛАКАТЬ' after 'плакал': 138.8557
Surprisal for 'ВЕРИТЬ' after 'поверить': 159.7498
Surprisal for 'ГОВОРИТЬ' after 'поговорить': 141.2491
Surprisal for 'ДЕРЖАТЬ' after 'подержать': 133.5329
Surprisal for 'ДУМАТЬ' after 'подумать': 149.6062
Surprisal for 'ЖЕЛАТЬ' after 'пожелать': 119.6074
Surprisal for 'ИГРАТЬ' after 'поиграть': 162.3116
Surprisal for 'ИСКАТЬ' after 'поискать': 146.9487
Surprisal for 'КУРИТЬ' after 'покурить': 164.3717
Surprisal for 'ЛЕЖАТЬ' after 'полежать': 130.6950
Surprisal for 'ПОМНИТЬ' after 'помнил': 150.1490
Surprisal

In [24]:
unrelated_stimulus = {
    "БЕЖАТЬ": "мальчик",
    "БРОСИТЬ": "никакой",
    "ВЕРИТЬ": "самый",
    "ВСТРЕЧАТЬ": "крупный",
    "ВЫБРАТЬ": "назад",
    "ГОВОРИТЬ": "все-таки",
    "ДЕРЖАТЬ": "работа",
    "ДУМАТЬ": "мысль",
    "ЖЕЛАТЬ": "принцип",
    "ЗАБЫТЬ": "структура",
    "ИГРАТЬ": "великий",
    "ИСКАТЬ": "мужчина",
    "КРИЧАТЬ": "простой",
    "КУРИТЬ": "взгляд",
    "ЛЕЖАТЬ": "простой",
    "НАЧАТЬ": "всякий",
    "ОЖИДАТЬ": "внимание",
    "ПИСАТЬ": "черный",
    "ПЛАКАТЬ": "женщина",
    "ПОМНИТЬ": "власть",
    "ПОНЯТЬ": "хорошо",
    "ПРОСИТЬ": "близкий",
    "РЕШИТЬ": "далекий",
    "СИДЕТЬ": "первый",
    "СЛУШАТЬ": "готовый",
    "СЛЫШАТЬ": "история",
    "СМОТРЕТЬ": "впрочем",
    "СНИМАТЬ": "развитие",
    "СОБРАТЬ": "сильный",
    "СООБЩИТЬ": "важный",
    "СТАВИТЬ": "очередь",
    "СТОЯТЬ": "палец",
    "СТРАДАТЬ": "машина",
    "СТРОИТЬ": "область",
    "СЧИТАТЬ": "вторник",
    "ТЕРЯТЬ": "метод",
    "ТРАТИТЬ": "задание",
    "УБИТЬ": "часто",
    "УЗНАТЬ": "тысяча",
    "УМЕТЬ": "характер"
}

for target, prime in unrelated_stimulus.items():
    src_tensor = encode_tensor(prime, char2idx, max_len=20)
    tgt_tensor = encode_tensor(target, char2idx, max_len=20, add_sos=True)
    encoder_outputs, hidden, cell = encoder(src_tensor)
    surprisal_value = compute_surprisal(decoder, encoder_outputs, hidden, cell, tgt_tensor)

    print(f"Surprisal for '{target}' after '{prime}': {surprisal_value:.4f}")


Surprisal for 'БЕЖАТЬ' after 'мальчик': 152.1419
Surprisal for 'БРОСИТЬ' after 'никакой': 151.0968
Surprisal for 'ВЕРИТЬ' after 'самый': 164.1617
Surprisal for 'ВСТРЕЧАТЬ' after 'крупный': 136.7074
Surprisal for 'ВЫБРАТЬ' after 'назад': 146.0451
Surprisal for 'ГОВОРИТЬ' after 'все-таки': 134.0031
Surprisal for 'ДЕРЖАТЬ' after 'работа': 157.7887
Surprisal for 'ДУМАТЬ' after 'мысль': 167.2929
Surprisal for 'ЖЕЛАТЬ' after 'принцип': 168.0079
Surprisal for 'ЗАБЫТЬ' after 'структура': 165.3170
Surprisal for 'ИГРАТЬ' after 'великий': 168.5400
Surprisal for 'ИСКАТЬ' after 'мужчина': 157.6588
Surprisal for 'КРИЧАТЬ' after 'простой': 165.6801
Surprisal for 'КУРИТЬ' after 'взгляд': 165.2780
Surprisal for 'ЛЕЖАТЬ' after 'простой': 180.6647
Surprisal for 'НАЧАТЬ' after 'всякий': 170.5712
Surprisal for 'ОЖИДАТЬ' after 'внимание': 152.9007
Surprisal for 'ПИСАТЬ' after 'черный': 164.5813
Surprisal for 'ПЛАКАТЬ' after 'женщина': 142.2334
Surprisal for 'ПОМНИТЬ' after 'власть': 158.8977
Surprisal for '

In [13]:
df["target_pred"] = df["prime"].apply(lambda word: predict(encoder, decoder, word, char2idx, idx2char, max_len=20))

In [14]:
import pandas as pd

# Show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# To avoid cutting off wide content
pd.set_option('display.max_colwidth', None)


df

Unnamed: 0,prime,target,target_pred,is_exact,edit_distance,char_accuracy,bleu
0,ТАЩИТЬ,ТАСКАТЬ,ТАСКАТЬ,True,0,1.0,1.0
1,зотеть,ЗОТЕТЬ,ЗОТЕТЬ,True,0,1.0,1.0
2,малыш,РОДИТЬ,РОДИТЬ,True,0,1.0,1.0
3,мначь,МНАЧЬ,МНАЧЬ,True,0,1.0,1.0
4,конфликт,СПОРИТЬ,СПОРИТЬ,True,0,1.0,1.0
5,рэбить,РЭБИТЬ,РЭБИТЬ,True,0,1.0,1.0
6,рагеть,РАГЕТЬ,РАГЕТЬ,True,0,1.0,1.0
7,дорогой,ЦЕНИТЬ,ЦЕНИТЬ,True,0,1.0,1.0
8,бореть,БОРЕТЬ,БОРЕТЬ,True,0,1.0,1.0
9,оказать,ОКАЗАТЬ,ОКАЗАТЬ,True,0,1.0,1.0
