In [12]:
import os
import pickle
from pathlib import Path

import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torch_geometric.data import Batch, Data
from transformers import AutoTokenizer

def get_df(data_split=None, small=None):
    """
    Read and returns a dataframe of the FactKG dataset.

    Args:
        data_split (str): Which datasplit to load, in `train`, `val` or `test`

    Raises:
        ValueError: If `data_split` is an unsuported string.

    Returns:
        pd.DataFrame: DataFrame of the dataset.
    """
    choices = ["train", "val", "test"]
    data_split='test'


    path ='E:\\factkg_test.pickle'
    df = pd.DataFrame.from_dict(pd.read_pickle(path), orient="index")
    df.reset_index(inplace=True)  # Fix so sentences are a column, not index
    df.rename(columns={"index": "Sentence"}, inplace=True)

    if small:
        df = df[:SMALL_SIZE]

    return df

def get_precomputed_embeddings():#just get the embedding before
    """
    Gets dict with precomputed embeddings, made with `make_subgraph_embeddings.py`.

    Raises:
        ValueError: If `data_split` is an unsuported string.

    Returns:
        dict: The dict of the subgraphs (as strings).
    """
    path = Path(DATA_PATH) / EMBEDDINGS_FILENAME
    embedding_dict = pickle.load(open(path, "rb"))
    return embedding_dict


class FactKGDataset(Dataset):
    def __init__(self, df, evidence=None):
        """
        Args:
            df (pd.DataFrame): Dataframe with claims ("Sentence") and labels ("Label").
            evidence (list, optional): List of the subgraph evidences to use, will be converted to string. `None`
                if no evidence should be used.
        """

        self.inputs = df["Sentence"]
        self.labels = [int(label[0]) for label in df["Label"]]
        self.length = len(df)

        if evidence is not None:
            self.inputs = [self.inputs[i] + " || " + str(evidence[i]) for i in range(self.length)]

    def __getitem__(self, idx):
        return self.inputs[idx], self.labels[idx]

    def __len__(self):
        return self.length


class CollateFunctor:
    def __init__(self, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, batch):
        inputs, labels = zip(*batch)
        labels = torch.tensor(labels)
        inputs = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True, max_length=self.max_length)
        inputs["labels"] = torch.as_tensor(labels)
        return inputs


def get_dataloader(data_split, subgraph_df, model="bert-base-uncased",
                   max_length=512, batch_size=64, shuffle=True, drop_last=True):
    """
    Creates a dataloader for the desired data split and evidence (subgraph).

    Args:
        data_split (str): Which datasplit to load, in `train`, `val` or `test`
        subgraph_type (str): The type of subgraph to use. Must be either `direct` (only the direct entity
            neighbours), `direct_filled` (the direct entity neigbhours, but if it is empty, replace it with
            all of the entity edges if the entities) or `one_hop` (all of the entity edges).
        subgraph_to_use (str). In ["discovered", "connected", "walkable"]. "discovered" means that we use the string
            representation of what directly found with `subgraph_type`. "Connected" means that walk the nodes and
            relations found with `subgraph_type`, and use the connected graphs if found, and the walkable if not.
            "walkable" means we use both the connected graphs and the walkable graphs.
        model (str, optional): Name of model, in order to get tokenizer. Defaults to "bert-base-uncased".
        max_length (int, optional): Max tokenizer length. Defaults to 512.
        batch_size (int, optional): Batch size to dataloader. Defaults to 128.
        shuffle (bool, optional): Shuffle dataset. Defaults to True.
        drop_last (bool, optional): Drop last batch if it is less than `batch_size`. Defaults to True.

    Returns:
        DataLoader: The dataloader.
    """
    df = get_df()
    #if subgraph_type is not None:
      #  subgraphs = get_subgraphs(data_split, subgraph_type)
      #  choices = ["discovered", "connected", "walkable"]
      #  if subgraph_to_use not in choices:
      #      raise ValueError(f"Argument `subgraph_to_use` must be in {choices}. Was {subgraph_to_use}. ")
      #  if subgraph_to_use == "discovered":
       #     evidence = subgraphs["subgraph"]
    evidence = subgraphs_df["subgraph"]
    else:
        evidence = None

    dataset = FactKGDataset(df, evidence)
    tokenizer = AutoTokenizer.from_pretrained(model)
    collate_func = CollateFunctor(tokenizer, max_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last,
                            collate_fn=collate_func)
    return dataloader

BERT_LAST_LAYER_DIM=[768]
def get_embedding(text, embeddings_dict, tokenizer, model):

    if embeddings_dict.get(text) is not None:
        return torch.tensor(embeddings_dict[text])
    return torch.zeros(BERT_LAST_LAYER_DIM)


def convert_to_pyg_format(graph, online_embeddings, embedding_dict=None, tokenizer=None, model=None):
    """
    Convert graph on DBpedia dict format to torch_embedding.data format, so it can be run in GNN.

    Args:
        graph (dict): Dict of graph, gotten by calling `kg.search()` on each element in the graph.
        online_embeddings (bool): If True, will calculate embeddings for knowledge subgraph online, with a model
                that might be tuned during the training.
        embedding_dict (dict): Dict mapping words to embeddings, to be used as node and edge features.
            This should be precomputed.
        tokenizer (tokenizer): Tokenizer to `model` if `online_embeddings`.
        model (pytroch model): Model to compute embeddings if `online_embeddings`.

    Returns:
        torch_geometric.data: Graph data.
    """

    if graph == []:  # Dummy empty graph. Not actually empty because of vectorized computations.
        graph = [["none", "none", "none"]]
    node_to_index = {}  # Node text to int mapping
    edge_to_index = {}  # Same for edges
    node_features = []  # List of embeddings
    edge_features = []  # Same for edges
    edge_indices = []

    current_node_idx = 0
    current_edge_idx = 0
    for edge_list in graph:
        node1, edge, node2 = edge_list  # Graph consists of list on the format [node1, edge, node2]

        if node1 not in node_to_index:
            node_to_index[node1] = current_node_idx
            embedding = get_embedding(node1, embeddings_dict=embedding_dict,
                                      tokenizer=tokenizer, model=model)
            node_features.append(embedding)
            current_node_idx += 1

        if node2 not in node_to_index:
            node_to_index[node2] = current_node_idx
            embedding = get_embedding(node2, embeddings_dict=embedding_dict,
                                      tokenizer=tokenizer, model=model)
            node_features.append(embedding)
            current_node_idx += 1

        if edge not in edge_to_index:
            edge_to_index[edge] = current_edge_idx
            embedding = get_embedding(edge, embeddings_dict=embedding_dict,
                                      tokenizer=tokenizer, model=model)
            edge_features.append(embedding)
            current_edge_idx += 1

        edge_indices.append([node_to_index[node1], node_to_index[node2]])

    edge_index = torch.tensor(edge_indices).t().contiguous()  # Transpose and make memory contigious
    x = torch.stack(node_features)
    edge_attr = torch.stack(edge_features)

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
    return data


class FactKGDatasetGraph(Dataset):
    def __init__(self, df, evidence, embedding_dict=None, tokenizer=None, model=None):
        """
        Initialize the dataset. This dataset will return tokenized claims, graphs for the subgraph, and labels.

        Args:
            df (pd.DataFrame): FactKG dataframe
            evidence (pd.DataFram): Dataframe with the subgraphs, found by `retrieve_subgraphs.py`.
            online_embeddings (bool): If True, will calculate embeddings for knowledge subgraph online, with a model
                that might be tuned during the training.
            embedding_dict (dict): Dict mapping the knowledge graph words to embeddings if not `online_embeddings`.
            tokenizer (tokenizer): Tokenizer to `model` if `online_embeddings`.
            model (pytroch model): Model to compute embeddings if `online_embeddings`.
            mix_graphs (bool, optional): If `True`, will use both the connected and the walkable graphs found in
                DBpedia. If `False`, will use connected if it is not empty, else walkable. Defaults to False.
        """
        self.inputs = df["Sentence"]
        self.labels = [int(label[0]) for label in df["Label"]]
        self.length = len(df)
        self.subgraphs = evidence
        self.online_embeddings = online_embeddings
        self.embedding_dict = embedding_dict
        self.tokenizer = tokenizer
        self.model = model
        self.mix_graphs = mix_graphs

    def __getitem__(self, idx):
        claims = self.inputs[idx]
        subgraph = self.subgraphs[idx]
        graph = convert_to_pyg_format(
            subgraph, online_embeddings=self.online_embeddings, embedding_dict=self.embedding_dict,
            tokenizer=self.tokenizer, model=self.model)

        label = self.labels[idx]
        return claims, graph, label

    def __len__(self):
        return self.length


class GraphCollateFunc:
    def __init__(self, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, batch):
        inputs, graph_batch, labels = zip(*batch)
        tokens = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True,
                                max_length=self.max_length)
        graph_batch = Batch.from_data_list(graph_batch)
        labels = torch.tensor(labels).float()

        return tokens, graph_batch, labels
subgraph_path="E:\\test_combined_subgraphs.pkl"
with open(subgraph_path, 'rb') as file:
    subgraphs = pickle.load(file)
EMBEDDINGS_FILENAME = "embeddings_testset.pkl"
DATA_PATH = "/content/drive/My Drive/NLP_Lab/"
embedding_path= Path(DATA_PATH) / EMBEDDINGS_FILENAME
with open(embedding_path, 'rb') as file:
    embedding_dict = pickle.load(file)

def get_graph_dataloader(
        data_split, subgraphs, embedding_dict,model=None, bert_model_name="bert-base-uncased",
        max_length=512, batch_size=64, shuffle=True, drop_last=True, mix_graphs=False):
    """
    Creates a dataloader for dataset with subgraph representation and tokenized text.

    Args:
        data_split (str): Which datasplit to load, in `train`, `val` or `test`
        subgraph_type (str): The type of subgraph to use. Must be either `direct` (only the direct entity
            neighbours), `direct_filled` (the direct entity neigbhours, but if it is empty, replace it with
            all of the entity edges if the entities), `one_hop` (all of the entity edges) or `relevant` (direct plus
            edges that appears in claim).
        online_embeddings (bool): If True, will calculate embeddings for knowledge subgraph online, with a model
                that might be tuned during the training.
        bert_model_name (str, optional): Name of model, in order to get tokenizer. Defaults to "bert-base-uncased".
        max_length (int, optional): Max tokenizer length. Defaults to 512.
        batch_size (int, optional): Batch size to dataloader. Defaults to 128.
        shuffle (bool, optional): Shuffle dataset. Defaults to True.
        drop_last (bool, optional): Drop last batch if it is less than `batch_size`. Defaults to True.
        mix_graphs (bool, optional): If `True`, will use both the connected and the walkable graphs found in
                DBpedia. If `False`, will use connected if it is not empty, else walkable. Defaults to False.

    Returns:
        DataLoader: The dataloader.
    """
    df = get_df(data_split)

    tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
    graph_collate_func = GraphCollateFunc(tokenizer, max_length=max_length)
    embedding_dict = embedding_dict
    dataset = FactKGDatasetGraph(
            df, subgraphs, embedding_dict=embedding_dict, mix_graphs=mix_graphs)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last,
                            collate_fn=graph_collate_func)
    return dataloader


SyntaxError: invalid syntax (282684423.py, line 121)

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.functional import cosine_similarity
from torch_geometric.nn import GATv2Conv, global_mean_pool
from transformers import AutoModelForSequenceClassification, AutoModel



def get_bert_model(model_name="bert", include_classifier=True, num_labels=2, freeze_base_model=False,
                   freeze_up_to_pooler=True, dropout_rate=0, use_roberta=False):
    """
    Load a pretrained BERT model with desired configurations.

    Args:
        model_name (str, optional): Name of the model, will be saved as a class variable. Defaults to "bert".
        include_classifier (bool, optional): Include a classification layer. Defaults to True.
        num_labels (int, optional): Number of outputs if classification layer is included. Defaults to 2.
        freeze_base_model (bool, optional): Will freeze all layers up to the classification layer. Defaults to False.
        freeze_up_to_pooler (bool, optional): Will freeze all layers until the last layer in BERT, the pooler,
            with approximately 500k parameters. Defaults to True.
        dropout_rate (int, optional): Dropout rate for the classification layer. Defaults to 0.
        use_roberta (bool): If `True`, will use RoBERTa instead of BERT.

    Returns:
        transformer model: The loaded model.
    """
    if freeze_base_model and freeze_up_to_pooler:
        logger.warn("Both `freeze_base_model` and `freeze_up_to_pooler` is True. Freezing base model.")

    if use_roberta:
        model_name = "roberta-base"
    else:
        model_name = "bert-base-uncased"

    if include_classifier:
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name, cache_dir="./cache", trust_remote_code=True, num_labels=num_labels,
            output_hidden_states=True
        )
    else:
        model = AutoModel.from_pretrained(model_name)

    model.name = model_name
    if freeze_base_model:
        for params in model.base_model.parameters():
            params.requires_grad = False
    elif freeze_up_to_pooler:
        for name, params in model.base_model.named_parameters():
            if not name.startswith("pooler"):
                params.requires_grad = False

    if dropout_rate != 0:
        classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.classifier.in_features, model.classifier.out_features)
        )
        model.classifier = classifier

    return model


class QAGNN(nn.Module):
    """
    Implementation of Quastion Answer Graph Neural Network model
    """
    def __init__(self, model_name, n_gnn_layers=2, gnn_hidden_dim=256, gnn_out_features=256, lm_layer_features=None,
                 gnn_batch_norm=True, freeze_base_model=False, freeze_up_to_pooler=True, gnn_dropout=0.3,
                 classifier_dropout=0.2, lm_layer_dropout=0.4, use_roberta=False):
        """
        Args:
            model_name (str): Name of the model, will be saved as a class variable.
            n_gnn_layers (int, optional): Number of layers in the GNN. Defaults to 2.
            gnn_hidden_dim (int, optional): Number of nodes in GNN layers. Defaults to 256.
            gnn_out_features (int, optional): Number of output nodes from the GNN. Defaults to 256.
            lm_layer_features (int): If not `None`, will add a linear layer after the claim embedding that will be
                used in the classification layer, concatenated with the GNN output. The layer will have
                `lm_layer_features` nodes, and a dropout of `lm_layer_dropout`. If `None`, will use the lm (bert)
                embedding concatenated with the GNN output for the classification layer.
            gnn_batch_norm (bool, optional): Whether or not to apply batch norm between the GNN layers.
                Defaults to True.
            freeze_base_model (bool, optional): Freeze the base model of the Bert language model. Defaults to False.
            freeze_up_to_pooler (bool, optional): Freeze up to the last part of the Bert model. Defaults to True.
            gnn_dropout (float, optional): Dropout rate for the GNN layers. Defaults to 0.3.
            classifier_dropout (float, optional): Dropout rate for the last layer.
            lm_layer_dropout (float, optional): Dropout rate for the optional `lm_layer`.
            use_roberta (bool): If True, will use RoBERTa for the language model (the one that trains, not the
                one for the embeddings.)

        Raises:
            ValueError: If `n_gnn_layers` is less than 2.
        """
        if n_gnn_layers < 2:
            raise ValueError(f"Argument `n_gnn_layers` must be atleast 2. Was {n_gnn_layers}. ")
        super(QAGNN, self).__init__()

        self.name = model_name
        self.bert = get_bert_model("bert_" + model_name, include_classifier=False, freeze_base_model=freeze_base_model,
                                   freeze_up_to_pooler=freeze_up_to_pooler, use_roberta=use_roberta)

        self.n_gnn_layers = n_gnn_layers
        self.gnn_layers = nn.ModuleList()
        first_gnn_layer = GATConv(self.bert.config.hidden_size, gnn_hidden_dim, dropout=gnn_dropout)
        self.gnn_layers.append(first_gnn_layer)
        for i in range(n_gnn_layers - 2):
            gnn_layer = GATv2Conv(gnn_hidden_dim, gnn_hidden_dim, dropout=gnn_dropout)
            self.gnn_layers.append(gnn_layer)
        last_gnn_layer = GATv2Conv(gnn_hidden_dim, gnn_out_features, dropout=gnn_dropout)
        self.gnn_layers.append(last_gnn_layer)

        claim_dim = self.bert.config.hidden_size
        self.with_lm_layer = False
        if lm_layer_features is not None:
            self.lm_dropout = nn.Dropout(lm_layer_dropout)
            self.lm_layer = nn.Linear(self.bert.config.hidden_size, lm_layer_features)
            claim_dim = lm_layer_features
            self.with_lm_layer = True

        self.gnn_batch_norm = gnn_batch_norm
        if gnn_batch_norm:
            self.gnn_batch_norm_layers = nn.ModuleList()
            for i in range(n_gnn_layers - 1):
                batch_norm_layer = nn.BatchNorm1d(gnn_hidden_dim)
                self.gnn_batch_norm_layers.append(batch_norm_layer)

        self.classsifier_dropout_layer = nn.Dropout(classifier_dropout)
        self.classifier = nn.Linear(gnn_out_features + claim_dim, 1)

    def forward(self, claim_tokens, data_graphs):
        claim_outputs = self.bert(**claim_tokens)
        claim_embeddings = claim_outputs.last_hidden_state[:, 0]  # Using the [CLS] token's embedding

        batch = data_graphs
        claim_embeddings_expanded = claim_embeddings[batch.batch]  # Expand to match batch size
        relevance_scores = F.cosine_similarity(claim_embeddings_expanded, batch.x, dim=-1).unsqueeze(-1)
        weighted_node_features = batch.x * relevance_scores

        x = weighted_node_features
        for i in range(self.n_gnn_layers):
            x = self.gnn_layers[i](x, batch.edge_index)
            if self.gnn_batch_norm and i < (self.n_gnn_layers - 1):
                x = self.gnn_batch_norm_layers[i](x)
            x = F.relu(x)

        # Pooling the node features
        pooled_gnn_output = global_mean_pool(x, batch.batch)  # Pool over all nodes in each graph

        if self.with_lm_layer:
            claim_embeddings = self.lm_dropout(claim_embeddings)
            claim_embeddings = self.lm_layer(claim_embeddings)

        combined_features = torch.cat((pooled_gnn_output, claim_embeddings), dim=1)

        combined_features = self.classsifier_dropout_layer(combined_features)
        out = self.classifier(combined_features)  # [batch_size, 1]

        return out.squeeze(1)


In [14]:

import numpy as np
import torch

N_EARLY_STOP_DEFAULT=3



def run_epoch_simple(train, dataloader, optimizer, model, device):
    total_loss = 0
    total_correct = 0
    if train:
        model.train()
    else:
        model.eval()

    for inputs in dataloader:
        batch = inputs.to(device)

        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        if train:
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * batch["input_ids"].size(0)
        probabilities = torch.softmax(outputs.logits, dim=1)
        preds = torch.argmax(probabilities, dim=1)
        total_correct += (preds == batch["labels"]).sum().item()
    return total_loss, total_correct


def run_epoch_qa_gnn(train, dataloader, optimizer, model, criterion, device):
    total_loss = 0
    total_correct = 0
    if train:
        model.train()
    else:
        model.eval()

    for inputs, data_graph, labels in dataloader:
        batch = inputs.to(device)
        data_graph = data_graph.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(batch, data_graph)

        loss = criterion(outputs, labels)
        if train:
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * batch["input_ids"].size(0)
        probabilities = torch.sigmoid(outputs)
        preds = (probabilities > 0.5).int()
        total_correct += (preds == labels).sum().item()
    return total_loss, total_correct


def train(model, criterion, optimizer, qa_gnn, train_loader, val_loader=None, n_epochs=10, scheduler=None,
          n_early_stop=None, save_models=True, device=None, non_blocking=False, verbose=1):
    """
    Trains a model and calculate training and valudation stats, given the model, loader, optimizer
    and some hyperparameters.

    Args:
        model (model): The model to train. Freeze layers ahead of calling this function.
        criterion (callable): Pytorch loss function.
        optimizer (optim): Pytorch Optimizer.
        qa_gnn (bool): Wether the model is a QA-GNN model with graphs (True), or a language model (False).
        train_loader (dataloader): Data loader for training set
        val_loader (dataloader, optional): Optinal validation data loader.
            If not None, will calculate validation loss and accuracy after each epoch.
        n_epochs (int, optional): Amount of epochs to run. Defaults to 10.
        scheduler (scheduler, optional): Optional learning rate scheduler.
        n_early_stop (int): The number of consecutive iterations without validation loss improvement that
            stops the training (early stopping). Will only work if `val_loader` is None.
            Set to `False` for deactivating it, and `None` for default value from `constants.py`.
        save_models (bool): If True and `val_loader` is not None, will save the best models state dicts.
        non_blocking (bool): If True, allows for asyncronous transfer between RAM and VRAM.
            This only works together with `pin_memory=True` to dataloader and GPU training.
        verbose (int): If 0, will not log anything. If not 0, will log last epoch with INFO and the others with DEBUG.

    Returns:
        dict: A dictionary of the training history. Will contain lists of training loss and accuracy over
            epochs, and for validation loss and accuracy if `val_loader` is not None.
        dict: A dictionary with trained model, and optional best-model state-dicts if `val_loader` is not None.
            On the form: {"final_model": model, "best_model_accuracy_state_dict": a, "best_model_loss_state_dict": b}
    """
    if n_early_stop is None:
        n_early_stop = N_EARLY_STOP_DEFAULT
    elif not n_early_stop:
        n_early_stop = n_epochs
    if device is None:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device, non_blocking=non_blocking)
    train_class_loss_list = []  # Initialize training history variables
    train_class_accuracy_list = []
    val_class_loss_list = []  # These will remain empty if `val_loader` is None
    val_class_accuracy_list = []
    best_epoch_number = -1
    best_val_loss = np.inf
    best_val_accuracy = -1
    best_model = None  # This will only be saved if `val_loader` is not None
    n_stagnation = 0
    if verbose != 0:
        logger.info(f"Starting training with device {device}.")

    for epoch in range(n_epochs):  # Train
        if qa_gnn:
            train_loss, train_correct = run_epoch_qa_gnn(
                train=True, dataloader=train_loader, optimizer=optimizer, model=model,
                criterion=criterion, device=device)
        else:
            train_loss, train_correct = run_epoch_simple(
                train=True, dataloader=train_loader, optimizer=optimizer, model=model, device=device)
        average_train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = 100 * train_correct / len(train_loader.dataset)
        train_class_loss_list.append(average_train_loss)
        train_class_accuracy_list.append(train_accuracy)

        if val_loader is not None:  # Eval
            with torch.no_grad():
                if qa_gnn:
                    val_loss, val_correct = run_epoch_qa_gnn(
                        train=False, dataloader=val_loader, optimizer=optimizer, model=model,
                        criterion=criterion, device=device)
                else:
                    val_loss, val_correct = run_epoch_simple(
                        train=False, dataloader=val_loader, optimizer=optimizer, model=model, device=device)

                average_val_loss = val_loss / len(val_loader.dataset)
                val_accuracy = 100 * val_correct / len(val_loader.dataset)
                val_class_loss_list.append(average_val_loss)
                val_class_accuracy_list.append(val_accuracy)

                if average_val_loss >= best_val_loss:  # Check for stagnation _before_ updating best_val_loss
                    n_stagnation += 1
                else:  # Better than best loss
                    n_stagnation = 0
                if n_stagnation == n_early_stop:  # Early stopping, abort training
                    if verbose == 0:  # No output
                        break
                    print(f"Epoch [{epoch + 1} / {n_epochs}]\n")
                    print(f"Train loss: {average_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}%")
                    print(f"Validation loss: {average_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}%\n")
                    print(f"Early stopping after {n_stagnation} rounds of no validation loss improvement.\n")
                    break

                if average_val_loss < best_val_loss:
                    best_val_loss = average_val_loss
                    best_epoch_number = epoch + 1
                    best_val_accuracy = val_accuracy
                    best_model = model.state_dict()

        if scheduler is not None:
            scheduler.step()

        if verbose == 0:  # Do not log
            continue

        message = f"Epoch [{epoch + 1} / {n_epochs}]\n"
        message += f"Train loss: {average_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}%\n"
        if val_loader is not None:
            message += f"Validation loss: {average_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}%"
        message += "\n"

        if epoch + 1 == n_epochs:  # Last epoch
            print(message)
        else:
            print(message)

    history = {"train_class_loss": train_class_loss_list, "train_class_accuracy": train_class_accuracy_list,
               "val_class_loss": val_class_loss_list, "val_class_accuracy": val_class_accuracy_list,
               "best_epoch": best_epoch_number, "best_val_accuracy": best_val_accuracy,
               "best_val_loss": best_val_loss, "model_name": model.name}

    models_dict = {"final_model": model}
    if val_loader is not None:
        models_dict["best_model_state_dict"] = best_model
        if save_models:
            save_model(best_model, model.name)

    return history, models_dict


In [15]:
import argparse

import torch
import torch.nn as nn
import transformers
from datasets import get_dataloader, get_graph_dataloader
from evaluate import evaluate_on_test_set
#from glocal_settings import SMALL, SMALL_SIZE
#from models import QAGNN, get_bert_model
#from train import train




seed_everything(57)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = QAGNN(
            args.model_name, n_gnn_layers=args.n_gnn_layers, gnn_hidden_dim=args.gnn_hidden_dim,
            gnn_out_features=args.gnn_out_features, lm_layer_features=args.lm_layer_features,
            gnn_batch_norm=args.gnn_batch_norm, freeze_base_model=args.freeze_base_model,
            freeze_up_to_pooler=args.freeze_up_to_pooler, gnn_dropout=args.gnn_dropout,
            classifier_dropout=args.classifier_dropout, lm_layer_dropout=args.lm_layer_dropout,
            use_roberta=args.use_roberta)
        
train_loader = get_graph_dataloader(
            "train", subgraph_type=args.subgraph_type, online_embeddings=args.online_embeddings,
            model=embedding_model, batch_size=args.batch_size, mix_graphs=args.mix_graphs)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate)
lr_scheduler = transformers.get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=50, num_training_steps=len(train_loader) * args.n_epochs
        )

history, models_dict = train(
            model=model, criterion=criterion, optimizer=optimizer, qa_gnn=args.qa_gnn, train_loader=train_loader,
            val_loader=val_loader, n_epochs=args.n_epochs, scheduler=lr_scheduler)


IndentationError: unexpected indent (580355891.py, line 30)