# RNA Secondary Structure Prediction Pipelines

This notebook predicts RNA secondary structure using pipelines composed of the
following units:

- Global structure estimator (GSE)
- Local structure refiner (LSR)
- Monomialization unit (MU)
- Binarization unit (BU)
- Symmetrization unit (SU)
- Constraint unit (CU)
- Threshold unit (TU)
- Noise removal unit (NRU)
- Output unit: SU -> CU -> MU -> BU -> NRU (OU)

The last pipeline components are always OU.

In [1]:
import json
from random import sample

import numpy as np
from torch import optim, nn

from diurnal import database, structure, train, transform, visualize, evaluate, segment
from diurnal.models import deep
from diurnal.models.deep import cnn

## Data Preparation

In [2]:
SIZE = 128  # RNA molecule maximum length (longer ones are filtered out).
SUBDIVISION_SIZE = 32  # Kernel size of the local structure refiner.

database.download("./data/", "archiveII")
SRC = "./data/archiveII/"  # Input directory containing the CT files.
DST = f"./data/archiveII_processed_{SIZE}/"  # Output directory of the formatted files.
names = database.format_filenames(SRC, DST + "names.txt", SIZE)
train_names, validation_names, test_names = train.split(
    names, (0.8, 0.1, 0.1)
)

2024-06-11T21:25:44.603842 > INFO Download and install an RNA database.
2024-06-11T21:25:44.638979 >     The dataset `archiveII` is already downloaded at `./data/archiveII`.
2024-06-11T21:25:44.641005 > INFO Extract the filenames from the directory `./data/archiveII/`.


100%|██████████| 3975/3975 [00:32<00:00, 123.47it/s]


2024-06-11T21:26:16.880722 >     Detected 3975 files. Kept 2326 files.
2024-06-11T21:26:16.909745 >     The file `./data/archiveII_processed_128/names.txt` already contains the names.


In [3]:
def format(dst: str, names: list[str]):
    # Primary structures
    pp_scalar = lambda x, y: structure.Primary.to_matrix(
        x, y, structure.Schemes.IUPAC_PAIRINGS_SCALARS
    )
    database.format_primary_structure(
        names, f"{dst}primary_potential_pairings_scalar.npy", SIZE, pp_scalar
    )
    database.format_primary_structure(
        names, f"{dst}primary_masks.npy", SIZE, structure.Primary.to_mask
    )
    database.format_primary_structure(
        names, f"{dst}primary_onehot.npy",
        SIZE, structure.Primary.to_onehot
    )
    # Secondary structures.
    database.format_secondary_structure(
        names, f"{dst}secondary_contact.npy", SIZE,
        structure.Secondary.to_matrix
    )

format(f"{DST}validation/", validation_names)
format(f"{DST}test/", test_names)
format(f"{DST}train/", train_names)

2024-06-11T21:26:16.966183 > INFO Formatting primary structures into `./data/archiveII_processed_128/validation/primary_potential_pairings_scalar.npy`.
2024-06-11T21:26:17.110368 >     The file `./data/archiveII_processed_128/validation/primary_potential_pairings_scalar.npy` already contains the formatted data.
2024-06-11T21:26:17.113394 > INFO Formatting primary structures into `./data/archiveII_processed_128/validation/primary_masks.npy`.
2024-06-11T21:26:18.059901 >     The file `./data/archiveII_processed_128/validation/primary_masks.npy` already contains the formatted data.
2024-06-11T21:26:18.062634 > INFO Formatting primary structures into `./data/archiveII_processed_128/validation/primary_onehot.npy`.
2024-06-11T21:26:18.787731 >     The file `./data/archiveII_processed_128/validation/primary_onehot.npy` already contains the formatted data.
2024-06-11T21:26:18.790510 > INFO Formatting secondary structures into `./data/archiveII_processed_128/validation/secondary_contact.npy`.
2

In [4]:
def format_data() -> tuple:
    train_set = {
        "input": (np.load(f"{DST}train/primary_potential_pairings_scalar.npy"), ),
        "output": np.load(f"{DST}train/secondary_contact.npy"),
        "mask": np.load(f"{DST}train/primary_masks.npy"),
        "names": []
    }
    validation_set = {
        "input": (np.load(f"{DST}validation/primary_potential_pairings_scalar.npy"), ),
        "output": np.load(f"{DST}validation/secondary_contact.npy"),
        "mask": np.load(f"{DST}validation/primary_masks.npy"),
        "names": []
    }
    test_set = {
        "input": (np.load(f"{DST}test/primary_potential_pairings_scalar.npy"), ),
        "output": np.load(f"{DST}test/secondary_contact.npy"),
        "mask": np.load(f"{DST}test/primary_masks.npy"),
        "names": []
    }
    return train_set, validation_set, test_set


# Create a training set for the LSR.
from os import mkdir
from tqdm import tqdm


def save_global_estimations(model, data, name):
    try:
        mkdir("tmp")
    except:
        pass

    predictions = []
    for i in tqdm(range(len(data["input"][0]))):
        p = model.predict([data["input"][0][i]])
        predictions.append(p)
    np.save(f"tmp/{name}", np.array(predictions))


FIRST = 16  # Select the most important areas.
RANDOM = 8  # Select random areas after the FIRST ones.
STRIDE = 16


def sample_crops(predictions, data, name, maximum, t = 0) -> list:
    predict_crops = []
    pp_crops = []  # Potential pairings
    contact_crops = []
    for index in tqdm(range(len(predictions))):
        prediction = predictions[index] * predictions[index].T * data["mask"][index]
        all_crops = segment.sample_areas(
            prediction, SUBDIVISION_SIZE, stride=STRIDE, threshold=t
        )
        if len(all_crops) > FIRST:
            difference = len(all_crops) - FIRST
            sampling = min(difference, RANDOM)
            if sampling:
                crops = all_crops[:FIRST] + sample(all_crops[FIRST:], sampling)
            else:
                crops = all_crops[:FIRST]
        else:
            crops = all_crops

        for crop, _ in crops:
            predict_crops.append(
                predictions[index][crop[0]: crop[0] + 32, crop[1]: crop[1] + 32]
            )
            pp_crops.append(
                data["input"][0][index][crop[0]: crop[0] + 32, crop[1]: crop[1] + 32]
            )
            contact_crops.append(
                data["output"][index][crop[0]: crop[0] + 32, crop[1]: crop[1] + 32]
            )

        if len(predict_crops) >= maximum:
            break

    np.save(f"tmp/{name}_areas_prediction_{SUBDIVISION_SIZE}.npy", np.array(predict_crops))
    np.save(f"tmp/{name}_areas_pp_{SUBDIVISION_SIZE}.npy", np.array(pp_crops))
    np.save(f"tmp/{name}_areas_contact_{SUBDIVISION_SIZE}.npy", np.array(contact_crops))


def format_refiner_data() -> tuple:
    train_set = {
        "input": (
            np.load(f"tmp/train_areas_prediction_{SUBDIVISION_SIZE}.npy"),
            np.load(f"tmp/train_areas_pp_{SUBDIVISION_SIZE}.npy")
        ),
        "output": np.load(f"tmp/train_areas_contact_{SUBDIVISION_SIZE}.npy"),
        "names": []
    }
    validation_set = {
        "input": (
            np.load(f"tmp/validation_areas_prediction_{SUBDIVISION_SIZE}.npy"),
            np.load(f"tmp/validation_areas_pp_{SUBDIVISION_SIZE}.npy")
        ),
        "output": np.load(f"tmp/validation_areas_contact_{SUBDIVISION_SIZE}.npy"),
        "names": []
    }
    test_set = {
        "input": (
            np.load(f"tmp/test_areas_prediction_{SUBDIVISION_SIZE}.npy"),
            np.load(f"tmp/test_areas_pp_{SUBDIVISION_SIZE}.npy")
        ),
        "output": np.load(f"tmp/test_areas_contact_{SUBDIVISION_SIZE}.npy"),
        "names": []
    }
    return train_set, validation_set, test_set


def refine(estimation: np.ndarray, potential_pairings: np.ndarray, model) -> np.ndarray:
    result = np.zeros((SIZE, SIZE))
    for v_stride in range(int(SIZE / SUBDIVISION_SIZE)):
        row = v_stride * SUBDIVISION_SIZE
        for h_stride in range(int(SIZE / SUBDIVISION_SIZE)):
            column = h_stride * SUBDIVISION_SIZE
            sub_estimation = estimation[
                row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE
            ]
            sub_input = potential_pairings[
                row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE
            ]
            prediction = model.predict([sub_estimation, sub_input])
            result[row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE] = prediction
    return result


def dense_refine(estimation: np.ndarray, potential_pairings: np.ndarray, model) -> np.ndarray:
    result = np.zeros((SIZE, SIZE))
    for v_stride in range(int(SIZE / SUBDIVISION_SIZE) * 2 - 1):
        row = int(v_stride / 2 * SUBDIVISION_SIZE)
        for h_stride in range(int(SIZE / SUBDIVISION_SIZE) * 2 - 1):
            column = int(h_stride /2 * SUBDIVISION_SIZE)
            sub_estimation = estimation[
                row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE
            ]
            sub_input = potential_pairings[
                row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE
            ]
            prediction = model.predict([sub_estimation, sub_input])
            result[row:row + SUBDIVISION_SIZE, column:column + SUBDIVISION_SIZE] += prediction
    return result / 2

In [5]:
def MU(matrix: np.ndarray) -> np.ndarray:
    return transform.to_monomial_matrix(matrix)


def BU(matrix: np.ndarray) -> np.ndarray:
    return transform.to_binary_matrix(matrix)


def SU(matrix: np.ndarray) -> np.ndarray:
    return matrix * matrix.T


def CU(matrix: np.ndarray, mask: np.ndarray) -> np.ndarray:
    return matrix * mask


def TU(matrix: np.ndarray, threshold: float) -> np.ndarray:
    return (matrix < threshold) * matrix


def NRU(matrix: np.ndarray, threshold: float) -> np.ndarray:
    return segment.convolutional_denoise(matrix, threshold=threshold)


def NU(matrix: np.ndarray) -> np.ndarray:
    normalized = matrix.copy()
    maximum = normalized.max()
    normalized[normalized == 0] = 2
    minimum = normalized.min()
    normalized -= minimum
    normalized *= 1 / (maximum - minimum)
    normalized[normalized > 1] = 0
    return normalized


def OU(matrix: np.ndarray, mask: np.ndarray) -> np.ndarray:
    matrix = CU(matrix, mask)
    matrix = SU(matrix)
    matrix = MU(matrix)
    matrix = BU(matrix)
    matrix = NRU(matrix, 2)
    return matrix

In [6]:
def inspect(model, data: dict, i: int) -> None:
    t = data["output"][i]
    p = model(data["input"][0][i], data["mask"][i])
    visualize.compare_pairings(t, p)
    print(f"F1 score: {evaluate.ContactMatrix.f1(t, p)}")


def measure_performances(model, data) -> list[float]:
    scores = []
    for i in range(len(data["output"])):
        t = data["output"][i]
        p = model(data["input"][0][i], data["mask"][i])
        f = evaluate.ContactMatrix.f1(t, p)
        scores.append(f)
    print(f"F1 scores: {scores}")
    print(f"Arithmetic mean: {np.mean(scores)}")
    return scores

## Model 1: Global Estimation (GSE -> OU)

In [7]:
train_data, validation_data, test_data = format_data()

N_MAX_EPOCHS = 100
GSE = deep.NN(
    model=cnn.UNet2D(SIZE, 4),
    n_epochs=N_MAX_EPOCHS,
    optimizer=optim.Adam,
    loss_fn=nn.CrossEntropyLoss,
    use_half=False,
    patience=10,
    verbosity=2,
)
GSE.train(train_data, validation_data)

def model_1(pp: np.ndarray, mask: np.ndarray):
    x = GSE.predict([pp])
    x = NU(x)
    x = OU(x, mask)
    return x

2024-06-11T21:26:26.051736 > INFO Training the model with 1860 data points.
2024-06-11T21:26:26.053740 >     Using 232 data points for validation.
2024-06-11T21:26:26.054739 >     Beginning the training.
2024-06-11T21:27:04.570590 >   0 / 100 [                                                  ] Loss: 2.10887  Patience: 10
2024-06-11T21:27:05.652495 >   1 / 100 [                                                  ] Loss: 2.07830  Patience: 10
2024-06-11T21:27:06.687761 >   2 / 100 [=                                                 ] Loss: 2.06632  Patience: 10
2024-06-11T21:27:07.597550 >   3 / 100 [=                                                 ] Loss: 2.06026  Patience: 10
2024-06-11T21:27:08.564924 >   4 / 100 [==                                                ] Loss: 2.05666  Patience: 10
2024-06-11T21:27:09.563894 >   5 / 100 [==                                                ] Loss: 2.05314  Patience: 10
2024-06-11T21:27:10.564001 >   6 / 100 [===                                 

In [None]:
model_1_results = measure_performances(model_1, test_data)

## Model 2: Refined Predictions (GSE -> LSR -> OU)

In [None]:
save_global_estimations(GSE, train_data, "train_predictions.npy")
save_global_estimations(GSE, validation_data, "validation_predictions.npy")
save_global_estimations(GSE, test_data, "test_predictions.npy")

print("Sample subregions from the predictions and input data.")
sample_crops(np.load("tmp/train_predictions.npy"), train_data, "train", 100_000)
sample_crops(np.load("tmp/validation_predictions.npy"), validation_data, "validation", 25_000)
sample_crops(np.load("tmp/test_predictions.npy"), test_data, "test", 25_000)

100%|██████████| 1860/1860 [00:04<00:00, 417.75it/s]
100%|██████████| 232/232 [00:00<00:00, 419.79it/s]
100%|██████████| 232/232 [00:00<00:00, 495.12it/s]


Sample subregions from the predictions and input data.


100%|██████████| 1860/1860 [00:00<00:00, 3391.33it/s]
100%|██████████| 232/232 [00:00<00:00, 3532.35it/s]
100%|██████████| 232/232 [00:00<00:00, 3274.87it/s]


In [None]:
refiner_train_data, refiner_validation_data, refiner_test_data = format_refiner_data()

N_MAX_EPOCHS = 500
LSR = deep.NN(
    model=cnn.UNet2DRefiner(SUBDIVISION_SIZE, 3),
    n_epochs=N_MAX_EPOCHS,
    optimizer=optim.Adam,
    loss_fn=nn.CrossEntropyLoss,
    loss_fn_args=None,
    use_half=False,
    patience=10,
    verbosity=2,
    batch=256
)
LSR.train(refiner_train_data, refiner_validation_data)

2024-06-11T09:46:49.357765 > INFO Training the model with 44046 data points.
2024-06-11T09:46:49.358790 >     Using 5494 data points for validation.
2024-06-11T09:46:49.358790 >     Beginning the training.
2024-06-11T09:46:51.995363 >   0 / 500 [                                                  ] Loss: 0.64707  Patience: 10
2024-06-11T09:46:53.832983 >   1 / 500 [                                                  ] Loss: 0.64123  Patience: 10
2024-06-11T09:46:55.565195 >   2 / 500 [                                                  ] Loss: 0.63286  Patience: 10
2024-06-11T09:46:57.346440 >   3 / 500 [                                                  ] Loss: 0.62898  Patience: 10
2024-06-11T09:46:59.088846 >   4 / 500 [                                                  ] Loss: 0.62746  Patience: 10
2024-06-11T09:47:00.883817 >   5 / 500 [                                                  ] Loss: 0.62638  Patience: 10
2024-06-11T09:47:02.653033 >   6 / 500 [                                  

In [None]:
def model_2(pp: np.ndarray, mask: np.ndarray):
    x = GSE.predict([pp])
    #x += refine(x, pp, LSR)
    x *= dense_refine(x, pp, LSR)
    x = OU(x, mask)
    return x
# 0.231

model_2_results = measure_performances(model_2, test_data)

F1 scores: [0.11764705926179886, 0.49122804403305054, 0.5084745287895203, 0.3333333432674408, 0.0, 0.11764706671237946, 0.0714285746216774, 0.2857142984867096, 0.7878788113594055, 0.20338982343673706, 0.25806450843811035, 0.06666667014360428, 0.555555522441864, 0.3137255012989044, 0.3606557548046112, 0.06060606241226196, 0.5, 0.2142857164144516, 0.25, 0.4444444477558136, 0.07407407462596893, 0.14814814925193787, 0.1666666716337204, 0.40816327929496765, 0.25, 0.4000000059604645, 0.25, 0.4642857313156128, 0.4761904776096344, 0.2641509473323822, 0.052631575614213943, 0.6000000238418579, 0.0, 0.42307692766189575, 0.42105263471603394, 0.0, 0.4878048598766327, 0.19607844948768616, 0.5454545021057129, 0.2380952388048172, 0.6666666865348816, 0.1860465109348297, 0.32786887884140015, 0.042553193867206573, 0.5789474248886108, 0.5079365372657776, 0.3137255012989044, 0.06451612710952759, 0.2857142686843872, 0.25, 0.4313725531101227, 0.0, 0.5396825075149536, 0.3333333432674408, 0.25, 0.6101695299148

## Model 3: Refined Predictions (GSE -> [CU -> NRU] -> LSR -> OU)

In [None]:
class Estimator:
    def predict(pp) -> np.ndarray:
        estimation = GSE.predict(pp)
        mask = pp[0].copy()
        mask[mask > 0] = 1
        estimation *= mask
        estimation = SU(estimation)
        return estimation#NRU(estimation, 2)


save_global_estimations(Estimator, train_data, "train_predictions.npy")
save_global_estimations(Estimator, validation_data, "validation_predictions.npy")
save_global_estimations(Estimator, test_data, "test_predictions.npy")

print("Sample subregions from the predictions and input data.")
sample_crops(np.load("tmp/train_predictions.npy"), train_data, "train", 100_000)
sample_crops(np.load("tmp/validation_predictions.npy"), validation_data, "validation", 25_000)
sample_crops(np.load("tmp/test_predictions.npy"), test_data, "test", 25_000)

100%|██████████| 1860/1860 [00:04<00:00, 384.98it/s]
100%|██████████| 232/232 [00:00<00:00, 360.20it/s]
100%|██████████| 232/232 [00:00<00:00, 396.03it/s]


Sample subregions from the predictions and input data.


100%|██████████| 1860/1860 [00:00<00:00, 2960.12it/s]
100%|██████████| 232/232 [00:00<00:00, 3223.99it/s]
100%|██████████| 232/232 [00:00<00:00, 3263.87it/s]


In [None]:
refiner_train_data, refiner_validation_data, refiner_test_data = format_refiner_data()

N_MAX_EPOCHS = 500
LSR = deep.NN(
    model=cnn.UNet2DRefiner(SUBDIVISION_SIZE, 3),
    n_epochs=N_MAX_EPOCHS,
    optimizer=optim.Adam,
    loss_fn=nn.CrossEntropyLoss,
    loss_fn_args=None,
    use_half=False,
    patience=5,
    verbosity=2,
    batch=256
)
LSR.train(refiner_train_data, refiner_validation_data)

2024-06-11T09:50:15.848119 > INFO Training the model with 44046 data points.
2024-06-11T09:50:15.849529 >     Using 5494 data points for validation.
2024-06-11T09:50:15.850576 >     Beginning the training.
2024-06-11T09:50:18.663487 >   0 / 500 [                                                  ] Loss: 0.70687  Patience: 5
2024-06-11T09:50:20.618088 >   1 / 500 [                                                  ] Loss: 0.70214  Patience: 5
2024-06-11T09:50:22.356590 >   2 / 500 [                                                  ] Loss: 0.69874  Patience: 5
2024-06-11T09:50:24.136345 >   3 / 500 [                                                  ] Loss: 0.68917  Patience: 5
2024-06-11T09:50:25.798763 >   4 / 500 [                                                  ] Loss: 0.68319  Patience: 5
2024-06-11T09:50:27.583848 >   5 / 500 [                                                  ] Loss: 0.68189  Patience: 5
2024-06-11T09:50:29.356517 >   6 / 500 [                                        

In [None]:
def model_3(pp: np.ndarray, mask: np.ndarray):
    x = Estimator.predict([pp])
    #y = refine(x, pp, LSR)
    y = dense_refine(x, pp, LSR)
    y *= mask
    x += y
    x = OU(x, mask)
    return x


model_3_results = measure_performances(model_3, test_data)

F1 scores: [0.1818181872367859, 0.5079364776611328, 0.625, 0.6557376980781555, 0.0, 0.14814814925193787, 0.24242424964904785, 0.2222222238779068, 0.7058823704719543, 0.6774193644523621, 0.27586206793785095, 0.0625, 0.523809552192688, 0.4590164124965668, 0.6666666865348816, 0.1818181723356247, 0.5806452035903931, 0.5588235855102539, 0.4848484992980957, 0.46875, 0.4126984179019928, 0.12121211737394333, 0.277777761220932, 0.5901638865470886, 0.1785714328289032, 0.5970149040222168, 0.636363685131073, 0.53125, 0.34285712242126465, 0.4912280738353729, 0.19230768084526062, 0.5806451439857483, 0.111111119389534, 0.6428571939468384, 0.6969696879386902, 0.0, 0.6000000238418579, 0.4262295365333557, 0.5, 0.1818181872367859, 0.5846154093742371, 0.5333333015441895, 0.6268656849861145, 0.20338982343673706, 0.7222222685813904, 0.7462686896324158, 0.17241379618644714, 0.06060606613755226, 0.23255813121795654, 0.2631579041481018, 0.4642857015132904, 0.0, 0.8235294222831726, 0.5573769807815552, 0.1052631

## Model 4: Refined Predictions (GSE -> OU -> LSR -> OU)

In [None]:
class Estimator:
    def predict(pp) -> np.ndarray:
        estimation = GSE.predict(pp)
        mask = pp[0].copy()
        mask[mask > 0] = 1
        return OU(estimation, mask)


save_global_estimations(Estimator, train_data, "train_predictions.npy")
save_global_estimations(Estimator, validation_data, "validation_predictions.npy")
save_global_estimations(Estimator, test_data, "test_predictions.npy")

print("Sample subregions from the predictions and input data.")
sample_crops(np.load("tmp/train_predictions.npy"), train_data, "train", 100_000)
sample_crops(np.load("tmp/validation_predictions.npy"), validation_data, "validation", 25_000)
sample_crops(np.load("tmp/test_predictions.npy"), test_data, "test", 25_000)

100%|██████████| 1860/1860 [00:08<00:00, 208.25it/s]
100%|██████████| 232/232 [00:01<00:00, 211.37it/s]
100%|██████████| 232/232 [00:00<00:00, 238.62it/s]


Sample subregions from the predictions and input data.


100%|██████████| 1860/1860 [00:00<00:00, 3026.20it/s]
100%|██████████| 232/232 [00:00<00:00, 3575.56it/s]
100%|██████████| 232/232 [00:00<00:00, 3438.32it/s]


In [None]:
refiner_train_data, refiner_validation_data, refiner_test_data = format_refiner_data()

N_MAX_EPOCHS = 500
LSR = deep.NN(
    model=cnn.UNet2DRefiner(SUBDIVISION_SIZE, 3),
    n_epochs=N_MAX_EPOCHS,
    optimizer=optim.Adam,
    loss_fn=nn.CrossEntropyLoss,
    use_half=False,
    patience=10,
    verbosity=2,
    batch=256
)
LSR.train(refiner_train_data, refiner_validation_data)

2024-06-11T09:41:31.504071 > INFO Training the model with 39311 data points.
2024-06-11T09:41:31.505087 >     Using 4962 data points for validation.
2024-06-11T09:41:31.505087 >     Beginning the training.
2024-06-11T09:41:33.761692 >   0 / 500 [                                                  ] Loss: 0.83845  Patience: 10
2024-06-11T09:41:35.384722 >   1 / 500 [                                                  ] Loss: 0.82758  Patience: 10
2024-06-11T09:41:36.969953 >   2 / 500 [                                                  ] Loss: 0.82425  Patience: 10
2024-06-11T09:41:38.552693 >   3 / 500 [                                                  ] Loss: 0.82206  Patience: 10
2024-06-11T09:41:40.266189 >   4 / 500 [                                                  ] Loss: 0.82020  Patience: 10
2024-06-11T09:41:42.031845 >   5 / 500 [                                                  ] Loss: 0.81756  Patience: 10
2024-06-11T09:41:43.705022 >   6 / 500 [                                  

In [None]:
def model_4(pp: np.ndarray, mask: np.ndarray):
    x = Estimator.predict([pp])
    y = refine(x, pp, LSR)
    x = y
    x = OU(x, mask)
    return x


model_4_results = measure_performances(model_4, test_data)

F1 scores: [0.11428571492433548, 0.37288135290145874, 0.47457626461982727, 0.222222238779068, 0.0, 0.2631579041481018, 0.0, 0.0555555559694767, 0.2142857164144516, 0.2181818187236786, 0.1875, 0.23255814611911774, 0.2631579041481018, 0.3050847351551056, 0.36000001430511475, 0.2702702581882477, 0.3225806653499603, 0.38596490025520325, 0.23529411852359772, 0.4150943458080292, 0.17543861269950867, 0.06666666269302368, 0.2926829159259796, 0.19607841968536377, 0.0, 0.5517241358757019, 0.38596490025520325, 0.5283018946647644, 0.23529411852359772, 0.2448979616165161, 0.2916666567325592, 0.21052633225917816, 0.0, 0.4210526645183563, 0.21052631735801697, 0.0, 0.12903225421905518, 0.24137932062149048, 0.33898305892944336, 0.2916666567325592, 0.1690140962600708, 0.3333333134651184, 0.0784313753247261, 0.3870967924594879, 0.0, 0.10909091681241989, 0.0, 0.27272725105285645, 0.13333332538604736, 0.40909093618392944, 0.12765957415103912, 0.0, 0.2352941334247589, 0.12244898080825806, 0.0512820519506931