In [None]:
!pip install huggingface_hub 
!pip install torch 
!pip install numpy 
!pip install fasttext 
!pip install pandas 
!pip install transformers 
!pip install scikit-learn 
!pip install sentence-transformers

!pip install gdown

### Download Dataset

In [None]:
!gdown --folder https://drive.google.com/drive/folders/15N-OFla0yl0wKxdnCs7MCdmXglTeST4I?usp=drive_link

### Download TXT

In [None]:
!gdown '1DUxOSd4IJLdzBgS8vJSl1pPg0coGzCMh'
!gdown '1F-6ckvKpYwUlqE5W-Ibdd3FmzOcnmN7d'
!gdown "1Nt3UQs3Y6Ip39irOl-d9MK2-h7K4L7AY"
!tar -xvf relative_init_vectors.tar.gz
!tar -xvf ft_word_embedding.tar.gz

### Function to reduce dataset size

In [None]:
# import random

# def reduce(input_path, output_path, percentage=10):
#     """
#     Reduce the size of the relation_init_vectors file by sampling a subset of lines.

#     Args:
#         input_path (str): Path to the original relation_init_vectors file.
#         output_path (str): Path to save the reduced file.
#         percentage (int): Percentage of lines to retain (excluding the header).
#     """
#     with open(input_path, 'r', encoding='utf-8') as infile:
#         lines = infile.readlines()

#     print(f"Original file {input_path} with {len(lines)} relations.")

#     # Keep the first line (header) intact
#     header = lines[0]
#     data_lines = lines[1:]

#     # Calculate the number of lines to retain
#     num_lines_to_keep = int(len(data_lines) * (percentage / 100))

#     # Randomly sample the lines
#     sampled_lines = random.sample(data_lines, num_lines_to_keep)

#     # Combine the header and sampled lines
#     reduced_lines = [header] + sampled_lines

#     # Write the reduced lines to the output file
#     with open(output_path, 'w', encoding='utf-8') as outfile:
#         outfile.writelines(reduced_lines)

#     print(f"Reduced file saved to {output_path} with {len(sampled_lines)} relations.")

# reduce(
#     input_path="relative_init_vectors.txt",
#     output_path="relative_init_vectors_small.txt",
#     percentage=1
# )

# reduce(
#     input_path="ft_word_embeddings.txt",
#     output_path="ft_word_embeddings_small.txt",
#     percentage=1
# )

### Download BGE Model

In [None]:
from huggingface_hub import hf_hub_download
files = [
    "1_Pooling/config.json",
    "config_sentence_transformers.json",
    "config.json",
    "model.safetensors",
    "modules.json",
    "README.md",
    "sentence_bert_config.json",
    "special_tokens_map.json",
    "tokenizer_config.json",
    "tokenizer.json",
    "vocab.txt",
]
repo_id = "BAAI/bge-base-en-v1.5"
for file in files:
    hf_hub_download(repo_id=repo_id, filename=file, local_dir="./bge-base")

### Download Model

In [None]:
import fasttext.util
fasttext.util.download_model('en', if_exists='ignore')

## train-rwe.ipynb

### Libraries

In [None]:
import sys
import random
import torch
import numpy as np
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

### Define Networks

In [None]:
# Define neural network model
class RWE_Model(torch.nn.Module):
    def __init__(
        self,
        embedding_size_input,
        embedding_size_output,
        embedding_weights,
        hidden_size,
        dropout,
    ):
        super(RWE_Model, self).__init__()
        self.embeddings = torch.nn.Embedding.from_pretrained(embedding_weights).float()
        self.embeddings.weight.requires_grad = True
        self.linear1 = torch.nn.Linear(embedding_size_input * 2, hidden_size)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(dropout)
        self.linear2 = torch.nn.Linear(hidden_size, embedding_size_output)

    def forward(self, input1, input2):
        embed1 = self.embeddings(input1)
        embed2 = self.embeddings(input2)
        out = self.linear1(
            torch.cat(((embed1 * embed2), (embed1 + embed2) / 2), 2)
        ).squeeze()
        out = self.relu(out)
        out = self.dropout(out)
        out = self.linear2(out)
        return out


# Define function to get the model
def getRWEModel(
    embedding_size_input, embedding_size_output, embedding_weights, hidden_size, dropout
):
    vocab_size = len(embedding_weights)
    model = RWE_Model(
        embedding_size_input,
        embedding_size_output,
        embedding_weights,
        hidden_size,
        dropout,
    )
    criterion = torch.nn.MSELoss()
    return model.cuda(), criterion

### Helper functions to train the model

In [None]:
# @title Helper functions to train the model
def load_vocab_embeddings(input_path):
    first_line = True
    vocab = set()
    input_file_relations = open(input_file_relations, "r", encoding="utf-8")
    for line in input_file_relations:
        if first_line == True:
            first_line = False
        else:
            vocab.add(line.strip().split(" ")[0])
    return vocab


def load_word_vocab_from_relation_vectors(input_path):
    pre_word_vocab = set()
    first_line = True
    input_file_relations = open(input_path, "r", encoding="utf-8")
    for line in input_file_relations:
        linesplit = line.strip().split(" ")
        if first_line == True:
            first_line = False
        else:
            relation = linesplit[0]
            if "__" not in relation:
                sys.exit("ERROR: Pair '" + relation + "' does not contain underscore")
            relation_split = relation.rsplit("__", 1)
            word1 = relation_split[0]
            word2 = relation_split[1]
            pre_word_vocab.add(word1)
            pre_word_vocab.add(word2)
    return pre_word_vocab


def load_embeddings_filtered_byvocab(input_path, vocab):
    word2index = {}
    index2word = {}
    matrix_word_embeddings = []
    first_line = True
    input_file_relations = open(input_path, "r", encoding="utf-8")
    cont = 0
    for line in input_file_relations:
        linesplit = line.strip().split(" ")
        if first_line == True:
            dimensions = int(linesplit[1])
            first_line = False
        else:
            word = linesplit[0]
            if word in vocab and word not in word2index:
                word2index[word] = cont
                index2word[cont] = word
                cont += 1
                matrix_word_embeddings.append(
                    np.asarray([float(dim) for dim in linesplit[1 : dimensions + 1]])
                )
    return matrix_word_embeddings, word2index, index2word, dimensions


def load_training_data(input_path, matrix_word_embeddings, word2index):
    matrix_input = []
    matrix_output = []
    first_line = True
    input_file_relations = open(input_path, "r", encoding="utf-8")
    for line in input_file_relations:
        linesplit = line.strip().split(" ")
        if first_line == True:
            dimensions = int(str(line.split(" ")[1]))
            first_line = False
        else:
            relation = linesplit[0]
            if "__" not in relation:
                sys.exit("ERROR: Pair '" + relation + "' does not contain underscore")
            relation_split = relation.rsplit("__", 1)
            word1 = relation_split[0]
            word2 = relation_split[1]
            if word1 in word2index and word2 in word2index:
                matrix_input.append(np.asarray([word2index[word1], word2index[word2]]))
                matrix_output.append(
                    np.asarray([float(dim) for dim in linesplit[1 : dimensions + 1]])
                )
    return matrix_input, matrix_output, dimensions


def split_training_data(matrix_input, matrix_output, devsize, batchsize):
    matrix_input_train = []
    matrix_output_train = []
    matrix_input_dev = []
    matrix_output_dev = []
    num_instances = int((len(matrix_input) // batchsize) * batchsize)
    final_size_dev = int(((num_instances * devsize) // batchsize) * batchsize)
    final_size_train = int(((num_instances - final_size_dev) // batchsize) * batchsize)
    print("Size train set: " + str(final_size_train))
    print("Size dev set: " + str(final_size_dev))
    all_instances = range(num_instances)
    list_index_dev = random.sample(all_instances, final_size_dev)
    for i in range(num_instances):
        if i in list_index_dev:
            matrix_input_dev.append(matrix_input[i])
            matrix_output_dev.append(matrix_output[i])
        else:
            matrix_input_train.append(matrix_input[i])
            matrix_output_train.append(matrix_output[i])
    return matrix_input_train, matrix_output_train, matrix_input_dev, matrix_output_dev


def trainIntervals(model, optimizer, criterion, batches, interval=100, lr=0.1):
    i = 0
    n = 0
    trainErr = 0
    for x1, x2, y in zip(*batches):
        model.train()
        optimizer.zero_grad()
        trainErr += gradUpdate(model, x1, x2, y, criterion, optimizer, lr)
        i += 1
        if i == interval:
            n += 1
            prev_train_err = trainErr
            trainErr = 0
            i = 0
    if i > 0 and prev_train_err != 0:
        print("Training error: " + str(prev_train_err / float(i)))


def validate(model, batches, criterion):
    evalErr = 0
    n = 0
    model.eval()
    for x1, x2, y in zip(*batches):
        y = torch.autograd.Variable(y, requires_grad=False)
        x1 = torch.autograd.Variable(x1, requires_grad=False)
        x2 = torch.autograd.Variable(x2, requires_grad=False)
        output = model(x1, x2)
        error = criterion(output, y)
        evalErr += error.item()
        n += 1
    return evalErr / n


def gradUpdate(model, x1, x2, y, criterion, optimizer, lr):
    output = model(x1, x2)
    error = criterion(output, y)
    error.backward()
    optimizer.step()
    return error.item()


def getBatches(data, batchSize):
    embsize = int(data.size(-1))
    return data.view(-1, batchSize, embsize)

In [None]:
# Define function to train the model
def trainEpochs(
    model,
    optimizer,
    criterion,
    trainBatches,
    validBatches,
    epochs=10,
    interval=100,
    lr=0.1,
):
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=2, threshold=1e-7, factor=0.9
    )
    min_error = -1.0
    for epoch in range(1, epochs + 1):
        print("\n     ----------    \n")
        print("EPOCH " + str(epoch))
        print("Starting training epoch " + str(epoch))
        trainIntervals(model, optimizer, criterion, trainBatches, interval, lr)
        validErr = validate(model, validBatches, criterion)
        scheduler.step(validErr)
        print("Validation error : " + str(validErr))
        if validErr < min_error or min_error == -1.0:
            new_model = model
            min_error = validErr
            print(
                "[Model at epoch "
                + str(epoch)
                + " obtained the lowest development error rate so far.]"
            )
        # if epoch % 5 == 0 or epoch == 1: torch.save(model, f"./model-epoch{str(epoch)}.model")
        torch.save(model, f"epoch-{epoch}.model")
        print("Epoch " + str(epoch) + " done")
    return new_model

### Define driver function to actually load the data for the model training

In [None]:
# @title Define driver function to actually load the data for the model training
def train_rwe(
    word_embeddings_path,
    rel_embeddings_path,
    output_path,
    hidden_size=0,
    dropout=0.5,
    epochs=5,
    interval=100,
    batchsize=10,
    dev_size=0.015,
    lr=0.01,
):
    if dev_size >= 1 or dev_size < 0:
        raise Exception(
            "Development data should be between 0% (0.0) and 100% (1.0) of the training data"
        )

    print("Loading word vocabulary...")
    pre_word_vocab = load_word_vocab_from_relation_vectors(rel_embeddings_path)

    # count = 0

    # for word in pre_word_vocab:
    #     if count < 10:
    #         print(word)
    #     count+=1
    
    print(
        "Word vocabulary loaded succesfully ("
        + str(len(pre_word_vocab))
        + " words). Now loading word embeddings..."
    )
    (
        matrix_word_embeddings,
        word2index,
        index2word,
        dims_word,
    ) = load_embeddings_filtered_byvocab(word_embeddings_path, pre_word_vocab)


    # count = 0

    # for matrix in matrix_word_embeddings:
    #     if count < 1:
    #         print(matrix)
    #     count+=1
    
    pre_word_vocab.clear()
    print(
        "Word embeddings loaded succesfully ("
        + str(dims_word)
        + " dimensions). Now loading relation vectors..."
    )
    matrix_input, matrix_output, dims_rels = load_training_data(
        rel_embeddings_path, matrix_word_embeddings, word2index
    )

    # count = 0

    # for matrix in matrix_input:
    #     if count < 1:
    #         print(matrix)
    #     count+=1
    

    # count = 0

    # for matrix in matrix_output:
    #     if count < 1:
    #         print(matrix)
    #     count+=1
    
    print(
        "Relation vectors loaded ("
        + str(dims_rels)
        + " dimensions), now spliting training and dev..."
    )
    random.seed(21)
    s1 = random.getstate()
    random.shuffle(matrix_input)
    random.setstate(s1)
    random.shuffle(matrix_output)
    (
        matrix_input_train,
        matrix_output_train,
        matrix_input_dev,
        matrix_output_dev,
    ) = split_training_data(matrix_input, matrix_output, dev_size, batchsize)
    matrix_input.clear()
    matrix_output.clear()
    print("Done preprocessing all the data, now loading and training the model...\n")

    if hidden_size == 0:
        hidden_size = dims_word * 2
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device used: " + str(device))
    embedding_weights = torch.tensor(matrix_word_embeddings)
    matrix_word_embeddings.clear()
    tensor_input_train_1 = torch.LongTensor([[x[0]] for x in matrix_input_train])
    tensor_input_train_2 = torch.LongTensor([[x[1]] for x in matrix_input_train])
    matrix_input_train.clear()
    tensor_input_dev_1 = torch.LongTensor([[x[0]] for x in matrix_input_dev])
    tensor_input_dev_2 = torch.LongTensor([[x[1]] for x in matrix_input_dev])
    matrix_input_dev.clear()
    tensor_output_train = torch.FloatTensor(matrix_output_train)
    matrix_output_train.clear()
    tensor_output_dev = torch.FloatTensor(matrix_output_dev)
    matrix_output_dev.clear()
    model, criterion = getRWEModel(
        dims_word, dims_rels, embedding_weights, hidden_size, dropout
    )
    print("RWE model loaded.")
    optimizer = torch.optim.Adam(model.parameters(), lr)
    trainX1batches = getBatches(tensor_input_train_1.cuda(), batchsize)
    trainX2batches = getBatches(tensor_input_train_2.cuda(), batchsize)
    validX1Batches = getBatches(tensor_input_dev_1.cuda(), batchsize)
    validX2Batches = getBatches(tensor_input_dev_2.cuda(), batchsize)
    trainYBatches = getBatches(tensor_output_train.cuda(), batchsize)
    validYBatches = getBatches(tensor_output_dev.cuda(), batchsize)
    print("Now starting training...\n")
    output_model = trainEpochs(
        model,
        optimizer,
        criterion,
        (trainX1batches, trainX2batches, trainYBatches),
        (validX1Batches, validX2Batches, validYBatches),
        epochs,
        interval,
        lr,
    )
    print(
        "\nTraining finished. Now loading relational word embeddings from trained model..."
    )

    parameters = list(output_model.parameters())
    num_vectors = len(parameters[0])
    print("Number of vectors: " + str(num_vectors))
    num_dimensions = len(parameters[0][0])
    print("Number of dimensions output embeddings: " + str(num_dimensions))
    txtfile = open(output_path, "w", encoding="utf8")
    txtfile.write(str(num_vectors) + " " + str(num_dimensions) + "\n")
    if num_vectors != embedding_weights.size()[0]:
        print(
            "Something is wrong in the input vectors: "
            + str(embedding_weights.size()[0])
            + " != "
            + str(num_vectors)
        )

    # count = 0
    for i in range(num_vectors):
        word = index2word[i]
        txtfile.write(word)
        vector = parameters[0][i].cpu().detach().numpy()
        # if count < 4:
        #     print(word, vector)
        #     count += 1
        for dimension in vector:
            txtfile.write(" " + str(dimension))
        txtfile.write("\n")
    txtfile.close()
    print("\nFINISHED. Word embeddings stored at " + output_path)

### Train Model

In [None]:
# train the model
train_rwe(
    "/kaggle/working/ft_word_embeddings.txt",
    "/kaggle/working/relative_init_vectors.txt",
    "/kaggle/working/rwe_embeddings.txt",
)

## Classification.ipynb

In [None]:
import io
import os
import fasttext
import pandas as pd
import numpy as np

from transformers import set_seed
from sklearn.neural_network import MLPClassifier
from sentence_transformers.SentenceTransformer import SentenceTransformer

In [None]:
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
set_seed(42, deterministic=True)

In [None]:
train = pd.read_csv("dataset/train.tsv", sep="\t", header=None, names=["kata1","kata2","relasi"]).dropna().reset_index(drop=True)
test = pd.read_csv("dataset/test.tsv", sep="\t", header=None, names=["kata1","kata2","relasi"]).dropna().reset_index(drop=True)

In [None]:
# # Check class distribution for the training dataset
# print("Class Distribution in Training Dataset:")
# train_class_distribution = train['relasi'].value_counts()
# print(train_class_distribution)

# # Check class distribution for the testing dataset
# print("\nClass Distribution in Testing Dataset:")
# test_class_distribution = test['relasi'].value_counts()
# print(test_class_distribution)

# # Check if the distribution is balanced
# def is_balanced(distribution):
#     counts = distribution.values
#     return max(counts) - min(counts) <= (sum(counts) * 0.05)  # Allow 5% tolerance

# print("\nIs the training dataset balanced?", is_balanced(train_class_distribution))
# print("Is the testing dataset balanced?", is_balanced(test_class_distribution))

In [None]:
def load_learned_embeddings(name: str):
    fin = io.open(name, 'r', encoding='utf-8', newline='\n', errors='ignore')
    data = {}
    for line in fin:
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = np.array(tokens[1:], dtype=float)
    return data
# rwe = load_learned_embeddings('rwe_embeddings.txt')  # Self-trained
rwe = load_learned_embeddings('reference_rwe.txt')  # From original repo
ft = fasttext.load_model('cc.en.300.bin')
embedding_model = SentenceTransformer('bge-base', local_files_only=True)

### Training

In [None]:
def get_bge_train_data(add_rwe=False):
    representations = []
    word1s_embeddings = embedding_model.encode(
        train["kata1"].to_list(), batch_size=256, show_progress_bar=True
    )
    word2s_embeddings = embedding_model.encode(
        train["kata2"].to_list(), batch_size=256, show_progress_bar=True
    )

    for i in range(len(train)):
        word1_embed = word1s_embeddings[i]
        word2_embed = word2s_embeddings[i]

        if add_rwe:
            word1_rwe = rwe.get(train["kata1"][i], np.zeros(300,))
            word2_rwe = rwe.get(train["kata2"][i], np.zeros(300,))
            word1_embed = np.concatenate((word1_embed, word1_rwe))
            word2_embed = np.concatenate((word2_embed, word2_rwe))

        pair_difference = np.subtract(word1_embed, word2_embed)
        representations.append(pair_difference)

    train["representation"] = representations
    X_train = np.vstack(train["representation"])
    y_train = train["relasi"]

    return X_train, y_train

def get_fasttext_train_data(add_rwe=False):
    representations = []
    for i in range(len(train)):
        word1_embed = ft.get_word_vector(train["kata1"][i])
        word2_embed = ft.get_word_vector(train["kata2"][i])

        if add_rwe:
            word1_rwe = rwe.get(train["kata1"][i], np.zeros(300,))
            word2_rwe = rwe.get(train["kata2"][i], np.zeros(300,))
            word1_embed = np.concatenate((word1_embed, word1_rwe))
            word2_embed = np.concatenate((word2_embed, word2_rwe))
        
        pair_difference = np.subtract(word1_embed, word2_embed)
        representations.append(pair_difference)

    train['representation'] = representations
    X_train = np.vstack(train['representation'])
    y_train = train['relasi']

    return X_train, y_train

In [None]:
X_train_bge, y_train_bge = get_bge_train_data()
X_train_bge_rwe, y_train_bge_rwe = get_bge_train_data(True)
X_train_ft, y_train_ft = get_fasttext_train_data()
X_train_ft_rwe, y_train_ft_rwe = get_fasttext_train_data(True)

In [None]:
model_bge = MLPClassifier(random_state=42, max_iter=300)
model_bge_rwe = MLPClassifier(random_state=42, max_iter=300)
model_ft = MLPClassifier(random_state=42, max_iter=300)
model_ft_rwe = MLPClassifier(random_state=42, max_iter=300)

model_bge.fit(X_train_bge, y_train_bge)
model_bge_rwe.fit(X_train_bge_rwe, y_train_bge_rwe)
model_ft.fit(X_train_ft, y_train_ft)
model_ft_rwe.fit(X_train_ft_rwe, y_train_ft_rwe)

### Testing

In [None]:
def get_bge_test_data(add_rwe=False):
    representations = []
    word1s_embeddings = embedding_model.encode(
        test["kata1"].to_list(), batch_size=256, show_progress_bar=True
    )
    word2s_embeddings = embedding_model.encode(
        test["kata2"].to_list(), batch_size=256, show_progress_bar=True
    )

    for i in range(len(test)):
        word1_embed = word1s_embeddings[i]
        word2_embed = word2s_embeddings[i]

        if add_rwe:
            word1_rwe = rwe.get(test["kata1"][i], np.zeros(300,))
            word2_rwe = rwe.get(test["kata2"][i], np.zeros(300,))
            word1_embed = np.concatenate((word1_embed, word1_rwe))
            word2_embed = np.concatenate((word2_embed, word2_rwe))

        pair_difference = np.subtract(word1_embed, word2_embed)
        representations.append(pair_difference)

    test["representation"] = representations
    X_test = np.vstack(test["representation"])
    y_test = test["relasi"]

    return X_test, y_test

def get_fasttext_test_data(add_rwe=False):
    representations = []
    for i in range(len(test)):
        word1_embed = ft.get_word_vector(test["kata1"][i])
        word2_embed = ft.get_word_vector(test["kata2"][i])

        if add_rwe:
            word1_rwe = rwe.get(test["kata1"][i], np.zeros(300,))
            word2_rwe = rwe.get(test["kata2"][i], np.zeros(300,))
            word1_embed = np.concatenate((word1_embed, word1_rwe))
            word2_embed = np.concatenate((word2_embed, word2_rwe))
        
        pair_difference = np.subtract(word1_embed, word2_embed)
        representations.append(pair_difference)

    test['representation'] = representations
    X_test = np.vstack(test['representation'])
    y_test = test['relasi']

    return X_test, y_test

def get_accuracy(pred, actual):
    sum = 0
    for i in range(len(pred)):
        if pred[i] == actual[i]:
            sum += 1
    print(f"Accuracy: {sum/len(pred)}")

In [None]:
X_test_bge, y_test_bge = get_bge_test_data()
X_test_bge_rwe, y_test_bge_rwe = get_bge_test_data(True)
X_test_ft, y_test_ft = get_fasttext_test_data()
X_test_ft_rwe, y_test_ft_rwe = get_fasttext_test_data(True)

In [None]:
prediction_bge = model_bge.predict(X_test_bge)
prediction_bge_rwe = model_bge_rwe.predict(X_test_bge_rwe)
prediction_ft = model_ft.predict(X_test_ft)
prediction_ft_rwe = model_ft_rwe.predict(X_test_ft_rwe)

In [None]:
get_accuracy(prediction_bge, y_test_bge)
get_accuracy(prediction_bge_rwe, y_test_bge_rwe)
get_accuracy(prediction_ft, y_test_ft)
get_accuracy(prediction_ft_rwe, y_test_ft_rwe)

### Display Transformation

In [None]:

def display_transformation(X_test, y_test, predictions, test_data, limit=5):
    print(f"{'Kata1':<15} {'Kata2':<15} {'Relasi':<10} {'Predicted':<10}")
    print("-" * 50)
    
    # Loop through a limited number of examples
    for i in range(min(limit, len(test_data))):
        kata1 = test_data["kata1"].iloc[i]
        kata2 = test_data["kata2"].iloc[i]
        relasi = y_test[i]
        predicted = predictions[i]
        print(f"{kata1:<15} {kata2:<15} {relasi:<10} {predicted:<10}")

# Display transformations for BGE without RWE
print("BGE without RWE:")
display_transformation(X_test_bge, y_test_bge, prediction_bge, test, limit=5)

# Display transformations for BGE with RWE
print("\nBGE with RWE:")
display_transformation(X_test_bge_rwe, y_test_bge_rwe, prediction_bge_rwe, test, limit=5)

# Display transformations for FastText without RWE
print("\nFastText without RWE:")
display_transformation(X_test_ft, y_test_ft, prediction_ft, test, limit=5)

# Display transformations for FastText with RWE
print("\nFastText with RWE:")
display_transformation(X_test_ft_rwe, y_test_ft_rwe, prediction_ft_rwe, test, limit=5)