# A2C analysis

In [1]:
import json
import os
import random
from itertools import accumulate

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchtext.data import get_tokenizer
from torchtext.vocab import GloVe
from tqdm import tqdm

In [2]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# DEVICE = torch.device("cpu")
DEVICE

device(type='cuda')

In [3]:
LOG_PATH_PREFIX = "../logs/a2c/"

## Hyper parameters

In [4]:
RL_GAMMA = 1.0
REWARD_STEPS = 0

GLOVE_DIM = 300
TRAIN_SIZE = "md"
TEST_SIZE = "sm"

EMBED_DIM = GLOVE_DIM
LSTM_LAYERS = 1
LSTM_H_DIM = EMBED_DIM
RNET_DROPOUT = 0.5

ENV_GAMMA = 0.1

PGN_LR = 0.005
RNET_LR = 0.001
SRM_LR = 0.001

PGN_CLIP_GRAD = 0.0
ENTROPY_BETA = 0.001

PRETRAIN_SRM_RNET_EPOCHS = 200
PRETRAIN_PGN_EPOCHS = 100

EPISODES_BATCH = 10
PRETRAIN_SRM_RNET_BATCH = EPISODES_BATCH

FEATURES = ""
COMMENTS = ""

In [5]:
EXPERIMENT_NAME = "23_04"
EPOCH = 1410

In [6]:
def load_from_experiments(experiment: str) -> None:
    global \
        RL_GAMMA, \
        REWARD_STEPS, \
        GLOVE_DIM, \
        TRAIN_SIZE, \
        TEST_SIZE, \
        EMBED_DIM, \
        LSTM_LAYERS, \
        LSTM_H_DIM, \
        RNET_DROPOUT, \
        ENV_GAMMA, \
        PGN_LR, \
        RNET_LR, \
        SRM_LR, \
        PGN_CLIP_GRAD, \
        ENTROPY_BETA, \
        PRETRAIN_SRM_RNET_EPOCHS, \
        PRETRAIN_PGN_EPOCHS, \
        EPISODES_BATCH, \
        PRETRAIN_SRM_RNET_BATCH, \
        FEATURES, \
        COMMENTS
    with open(os.path.join(".", LOG_PATH_PREFIX, experiment, "configs.json"), "r") as f:
        hyper_dict = json.load(f)
    RL_GAMMA = hyper_dict["RL_GAMMA"]
    REWARD_STEPS = hyper_dict["REWARD_STEPS"]
    GLOVE_DIM = hyper_dict["GLOVE_DIM"]
    TRAIN_SIZE = hyper_dict["TRAIN_SIZE"]
    TEST_SIZE = hyper_dict["TEST_SIZE"]
    EMBED_DIM = hyper_dict["EMBED_DIM"]
    LSTM_LAYERS = hyper_dict["LSTM_LAYERS"]
    LSTM_H_DIM = hyper_dict["LSTM_H_DIM"]
    RNET_DROPOUT = hyper_dict["RNET_DROPOUT"]
    ENV_GAMMA = hyper_dict["ENV_GAMMA"]
    PGN_LR = hyper_dict["PGN_LR"]
    RNET_LR = hyper_dict["RNET_LR"]
    SRM_LR = hyper_dict["SRM_LR"]
    PGN_CLIP_GRAD = hyper_dict["PGN_CLIP_GRAD"]
    ENTROPY_BETA = hyper_dict["ENTROPY_BETA"]
    PRETRAIN_SRM_RNET_EPOCHS = hyper_dict["PRETRAIN_SRM_RNET_EPOCHS"]
    PRETRAIN_PGN_EPOCHS = hyper_dict["PRETRAIN_PGN_EPOCHS"]
    EPISODES_BATCH = hyper_dict["EPISODES_BATCH"]
    PRETRAIN_SRM_RNET_BATCH = hyper_dict["PRETRAIN_SRM_RNET_BATCH"]
    FEATURES = hyper_dict["FEATURES"]
    COMMENTS = hyper_dict["COMMENTS"]
    print(hyper_dict)

## Loader

In [7]:
def load_model(name: str, model):
    path = os.path.join(
        ".", LOG_PATH_PREFIX, EXPERIMENT_NAME, "best", str(EPOCH), "models", name
    )
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint["model_state_dict"])

## RL Utilities

In [8]:
def calculate_qvals(
    rewards: list[float], gamma: float = RL_GAMMA, reward_steps: int = REWARD_STEPS
) -> np.ndarray:
    rw_steps = reward_steps if reward_steps != 0 else len(rewards)

    return np.array(
        [
            list(
                accumulate(
                    reversed(rewards[i : i + rw_steps]), lambda x, y: gamma * x + y
                )
            )[-1]
            for i in range(len(rewards))
        ]
    )

## Dataset

In [9]:
tokenizer = get_tokenizer("basic_english")
global_vectors = GloVe(dim=GLOVE_DIM, cache="../data")


def text_pipeline(x):
    return global_vectors.get_vecs_by_tokens(tokenizer(x), lower_case_backup=True)


def tokenized_pipeline(x):
    return global_vectors.get_vecs_by_tokens(x, lower_case_backup=True)

In [10]:
def read_from_disk(path: str) -> np.ndarray:
    return pd.read_csv(path).to_numpy()

In [11]:
class PlagiarismDataset(Dataset):
    def __init__(self, data: np.ndarray):
        targets, candidates, scores = [], [], []

        for target, candidate, score in data:
            targets.append(tokenizer(target))
            candidates.append(tokenizer(candidate))
            scores.append(score)

        self.targets = targets
        self.candidates = candidates
        self.scores = np.array(scores).astype(np.float16)

    def __len__(self):
        return len(self.scores)

    def __getitem__(
        self, idx
    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, list, list]:
        return (
            tokenized_pipeline(self.targets[idx]).to(DEVICE),
            tokenized_pipeline(self.candidates[idx]).to(DEVICE),
            torch.tensor([self.scores[idx]]).float().to(DEVICE),
            self.targets[idx],
            self.candidates[idx],
        )

In [12]:
test_data = PlagiarismDataset(
    read_from_disk(f"../generated/datasets/test_{TEST_SIZE}.csv")
)

print(f"{len(test_data)=}")

len(test_data)=147


## RNet & SRModel

In [13]:
class RNetNN(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, hidden_dim: int = 128) -> None:
        super(RNetNN, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(RNET_DROPOUT),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.net(x)


class RNet:
    def __init__(self, lr: float = 1e-2, device=DEVICE):
        self.net = RNetNN(2 * LSTM_LAYERS * LSTM_H_DIM, 1).to(device)
        self.loss_fn = F.mse_loss
        self.optimizer = optim.Adam(
            self.net.parameters(),
            lr=lr,
        )

    def __call__(self, data: torch.Tensor, grad: bool = True) -> torch.Tensor:
        if grad:
            return self.net(data)
        with torch.no_grad():
            return self.net(data)


class SRModelNN(nn.Module):
    def __init__(
        self, input_dim: int, hidden_size: int, num_layers: int = LSTM_LAYERS
    ) -> None:
        super(SRModelNN, self).__init__()

        self.net = nn.LSTM(
            input_dim,
            hidden_size,
            num_layers=num_layers,
            bidirectional=False,
        ).to(DEVICE)

    def forward(self, *x):
        return self.net(*x)


class SRModel:
    def __init__(self, lr: float = 1e-2, device=DEVICE):
        self.net = SRModelNN(EMBED_DIM, LSTM_H_DIM).to(device)
        self.optimizer = optim.Adam(
            self.net.parameters(),
            lr=lr,
        )

        self.device = device

    def __call__(self, *data, grad: bool = True) -> torch.Tensor:
        if grad:
            return self.net(*data)
        with torch.no_grad():
            return self.net(*data)

    def call_batch(self, data, grad: bool = True) -> torch.Tensor:
        cat_data = torch.cat(data) if type(data) == list else data
        cat_data = cat_data.view(len(data), 1, -1)
        h_c = (
            torch.zeros(LSTM_LAYERS, 1, LSTM_H_DIM).to(self.device),
            torch.zeros(LSTM_LAYERS, 1, LSTM_H_DIM).to(self.device),
        )
        out, _ = self.__call__(cat_data, h_c, grad=grad)
        return out[-1].flatten()

In [14]:
def update_sample(
    srm: SRModel,
    rnet: RNet,
    target: list[torch.Tensor],
    candidate: list[torch.Tensor],
    train_srm: bool = True,
    train_rnet: bool = True,
):
    srm_out_target = srm.call_batch(target, train_srm)
    srm_out_candidate = srm.call_batch(candidate, train_srm)

    rnet_out = rnet(
        torch.cat([srm_out_target, srm_out_candidate]).view(1, -1), grad=train_rnet
    )

    return rnet_out.squeeze(-1)

## Environment

In [15]:
class Env:
    def _get_state(self) -> torch.Tensor:
        token = self.data[self.sentence_idx][self.token_idx]
        return torch.cat(
            [
                self.hs[self.sentence_idx].flatten(),
                self.cs[self.sentence_idx].flatten(),
                token,
            ]
        ).to(DEVICE)

    def _get_reward(self) -> float:
        if not self.is_terminal():
            return 0.0

        self.statistics_dict["Deletions ratio"] = (
            self.statistics_dict["Deletions"] / self.total_words
        )

        # Case when agent removes the entire sequence
        if self.statistics_dict["Deletions"] == self.total_words:
            return 0.0

        rnet_out = self.rnet(self.hs.view(1, -1), grad=False)

        score_tensor = torch.FloatTensor([self.data[2]]).to(DEVICE)
        loss = self.rnet.loss_fn(rnet_out.squeeze(-1), score_tensor)
        self.loss = loss.item()

        rnet_reward = np.log(1 - loss.item() + 1e-8)
        deletions_reward = (
            self.gamma * self.statistics_dict["Deletions"] / self.total_words
        )

        self.statistics_dict["RNet reward"] = rnet_reward
        self.statistics_dict["Deletions reward"] = deletions_reward

        return rnet_reward + deletions_reward

    def __init__(
        self,
        dataset: Dataset,
        srm: SRModel,
        rnet: RNet,
        gamma: float = ENV_GAMMA,
        random_sampling: bool = True,
    ) -> None:
        self.srm = srm
        self.rnet = rnet

        self.gamma = gamma

        self.dataset = dataset

        self.random_sampling = random_sampling
        self.idx = -1

        self.reset()

    def reset(self, idx: int = -1) -> torch.Tensor:
        self.loss = -1

        self.steps = 0

        self.sentence_idx = 0
        self.token_idx = 0

        if self.random_sampling:
            self.idx = np.random.randint(len(self.dataset))
        elif idx >= 0:
            self.idx = idx
        else:
            self.idx = (self.idx + 1) % len(self.dataset)

        self.data = self.dataset[self.idx]

        self.total_words = len(self.data[0]) + len(self.data[1])

        self.hs = torch.zeros((2, LSTM_LAYERS, LSTM_H_DIM)).to(DEVICE)
        self.cs = torch.zeros((2, LSTM_LAYERS, LSTM_H_DIM)).to(DEVICE)

        self.used_tokens: tuple[list[torch.Tensor], list[torch.Tensor], float] = (
            [],
            [],
            self.data[2].item(),
        )

        self.used_pure_tokens: tuple[list[str], list[str]] = ([], [])
        self.deleted_tokens_dict = {}

        self.statistics_dict = {
            "Initial Target length": len(self.data[0]),
            "Initial Candidate length": len(self.data[1]),
            "Processed Target length": 0,
            "Processed Candidate length": 0,
            "Deletions": 0,
            "RNet reward": 0.0,
            "Deletions reward": 0.0,
            "Deletions ratio": 0.0,
        }

        return self.get_state()

    def get_state(self) -> torch.Tensor:
        return self._get_state()

    def is_terminal(self) -> bool:
        return self.sentence_idx == 1 and self.token_idx == (len(self.data[1]) - 1)

    def interact(self, action: int) -> tuple[torch.Tensor, float, bool]:
        # 0 - retain
        # 1 - delete

        if self.is_terminal():
            return self._get_state(), 0, self.is_terminal()

        if action == 1:
            self.statistics_dict["Deletions"] += 1
            pure_token = self.data[self.sentence_idx + 3][self.token_idx].lower()
            self.deleted_tokens_dict[pure_token] = (
                self.deleted_tokens_dict.get(pure_token, 0) + 1
            )
            self.used_pure_tokens[self.sentence_idx].append("DELETED")

        elif action == 0:
            pure_token = self.data[self.sentence_idx + 3][self.token_idx].lower()
            self.used_pure_tokens[self.sentence_idx].append(pure_token)

            if self.sentence_idx == 0:
                self.statistics_dict["Processed Target length"] += 1
            else:
                self.statistics_dict["Processed Candidate length"] += 1

            token = self.data[self.sentence_idx][self.token_idx]
            self.used_tokens[self.sentence_idx].append(  # type: ignore
                token.clone().detach()
            )

            h_c = (
                self.hs[self.sentence_idx].clone().detach(),
                self.cs[self.sentence_idx].clone().detach(),
            )

            _, (h, c) = self.srm(token.view(1, -1), h_c, grad=False)
            self.hs[self.sentence_idx] = h.clone().detach()
            self.cs[self.sentence_idx] = c.clone().detach()

        self.steps += 1
        self.token_idx += 1
        if self.sentence_idx == 0 and self.token_idx >= len(self.data[0]):
            self.sentence_idx = 1
            self.token_idx = 0

        return self._get_state(), self._get_reward(), self.is_terminal()

    def get_used_tokens(self) -> tuple[list[torch.Tensor], list[torch.Tensor], float]:
        if len(self.used_tokens[0]) == 0:
            self.used_tokens[0].append(torch.zeros(LSTM_LAYERS, LSTM_H_DIM).to(DEVICE))
        if len(self.used_tokens[1]) == 0:
            self.used_tokens[1].append(torch.zeros(LSTM_LAYERS, LSTM_H_DIM).to(DEVICE))
        return self.used_tokens

    def get_observation_shape(self) -> int:
        return 2 * LSTM_LAYERS * LSTM_H_DIM + EMBED_DIM

    def get_actions_shape(self) -> int:
        return 2

    @staticmethod
    def sample_action() -> int:
        return np.random.choice([0, 1])

## PGN


In [16]:
class A2CNet(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, hidden_dim: int = 16) -> None:
        super(A2CNet, self).__init__()

        self.body = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
        )

        self.policy = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
        )

        self.value = nn.Sequential(
            nn.Linear(hidden_dim, 1),
        )

    def forward(self, x):
        body_out = self.body(x)
        return self.policy(body_out), self.value(body_out)

## Agent

In [17]:
class Agent:
    def choose_action(self, action_logits):
        return random.choices(range(len(action_logits)), F.softmax(action_logits, dim=0))[
            0
        ]

    def choose_optimal_action(self, action_logits) -> int:
        return int(np.argmax(F.softmax(action_logits, dim=0).cpu()).item())

## Evaluation

In [18]:
def evaluate(
    srm: SRModel,
    rnet: RNet,
    pgn: nn.Module,
    agent: Agent,
    eval_env: Env,
) -> tuple[list[str], list[str]]:
    srm.net.eval()
    rnet.net.eval()
    pgn.eval()

    total_len = len(eval_env.dataset)

    losses = []

    iteration = 0
    state = eval_env.reset(0)

    targets = []
    candidates = []

    with tqdm(total=total_len, desc="Evaluation") as loop:
        while iteration < total_len:
            with torch.no_grad():
                action_logits = pgn(state)[0]
            action = agent.choose_optimal_action(action_logits)
            state2, _, done = eval_env.interact(action)
            state = state2.clone().detach()

            if done:
                iteration += 1
                losses.append(eval_env.loss)

                t, c = eval_env.used_pure_tokens
                targets.append(t)
                candidates.append(c)

                state = eval_env.reset()

                loop.update(1)

    return targets, candidates

## Load & prepare data

In [19]:
initial_targets, initial_candidates = zip(*[(x[3], x[4]) for x in test_data])

itl = np.mean([len(x) for x in initial_targets])
icl = np.mean([len(x) for x in initial_candidates])

print(f"Mean init target len = {itl:.2f}")
print(f"Mean init candidate len = {icl:.2f}")

Mean init target len = 38.65
Mean init candidate len = 40.93


In [37]:
ANALYSIS_PATH = "../generated/structure_analysis"


def define_analytical_data(name: str, sentences: list[str]):
    d = {i: {"initial": s} for i, s in enumerate(sentences)}
    with open(os.path.join(ANALYSIS_PATH, f"{name}.json"), "w") as f:
        json.dump(d, f, indent=2)


def convert_csv(path: str, restrictions: tuple[int, int] = (0, 100000)) -> list[str]:
    df = pd.read_csv(path)
    data = []
    for _, r in df.iterrows():
        for t in r:
            if len(t) > restrictions[1] or len(t) < restrictions[0]:
                continue
            data.append(str(t))
    return data


def convert_csvs(restrictions: tuple[int, int]) -> list[str]:
    return [
        *convert_csv("../generated/test1/6.csv", restrictions=restrictions),
        *convert_csv("../generated/test2/6.csv", restrictions=restrictions),
        *convert_csv("../generated/train1/119.csv", restrictions=restrictions),
        *convert_csv("../generated/train2/119.csv", restrictions=restrictions),
    ]


restrictions = (120, 122)
data = convert_csvs(restrictions=restrictions)
print(f"{len(data)=}")
define_analytical_data(
    f"{restrictions[0]}_{restrictions[1]}", convert_csvs(restrictions=restrictions)
)

len(data)=17


## Analysis

In [21]:
load_from_experiments(EXPERIMENT_NAME)

rnet = RNet(lr=RNET_LR)
srm = SRModel(lr=SRM_LR)

env = Env(test_data, srm, rnet, random_sampling=False)
agent = Agent()

pgn = A2CNet(
    input_dim=env.get_observation_shape(), output_dim=env.get_actions_shape()
).to(DEVICE)


load_model("pgn", pgn)
load_model("rnet", rnet.net)
load_model("srm", srm.net)

{'RL_GAMMA': 1.0, 'REWARD_STEPS': 0, 'GLOVE_DIM': 300, 'TRAIN_SIZE': 'md', 'TEST_SIZE': 'sm', 'EMBED_DIM': 300, 'LSTM_LAYERS': 1, 'LSTM_H_DIM': 300, 'RNET_DROPOUT': 0.5, 'ENV_GAMMA': 0.1, 'PGN_LR': 0.005, 'RNET_LR': 0.001, 'SRM_LR': 0.001, 'PGN_CLIP_GRAD': 0.0, 'ENTROPY_BETA': 0.001, 'PRETRAIN_SRM_RNET_EPOCHS': 200, 'PRETRAIN_PGN_EPOCHS': 100, 'EPISODES_BATCH': 10, 'PRETRAIN_SRM_RNET_BATCH': 10, 'FEATURES': '', 'COMMENTS': ''}


In [22]:
targets, candidates = evaluate(srm, rnet, pgn, agent, env)

Evaluation: 100%|██████████| 147/147 [00:12<00:00, 11.77it/s]


In [23]:
tl = np.mean([len([w for w in x if w != "DELETED"]) for x in targets])
cl = np.mean([len([w for w in x if w != "DELETED"]) for x in candidates])

print(f"Mean target len = {tl:.2f} | {itl-tl:.2f} ~ {100-tl/itl*100:.2f}% deleted")
print(f"Mean candidate len = {cl:.2f} | {icl-cl:.2f} ~ {100-cl/icl*100:.2f}% deleted")

Mean target len = 12.54 | 26.10 ~ 67.54% deleted
Mean candidate len = 12.63 | 28.30 ~ 69.14% deleted


## Custom data

In [38]:
def generate_and_save(name: str):
    with open(os.path.join(ANALYSIS_PATH, f"{name}.json"), "r") as f:
        data = json.load(f)

    sentences = [x["initial"] for x in data.values()]

    custom_data = PlagiarismDataset(
        [(s, "a", 0) for s in sentences],
    )

    custom_env = Env(custom_data, srm, rnet, random_sampling=False)
    c_targets, _ = evaluate(srm, rnet, pgn, agent, custom_env)

    for idx, t in enumerate(c_targets):
        data[str(list(data.keys())[idx])]["a2c"] = " ".join(t)

    with open(os.path.join(ANALYSIS_PATH, f"{name}.json"), "w") as f:
        json.dump(data, f, indent=2)


generate_and_save("120_122")

Evaluation:   0%|          | 0/17 [00:00<?, ?it/s]

Evaluation: 100%|██████████| 17/17 [00:00<00:00, 31.25it/s]
