In [1]:
import os
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import re
import os
import string

import torch
import torch.nn as nn
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool as gap
from torch.optim.lr_scheduler import _LRScheduler
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
from arabert.preprocess import ArabertPreprocessor
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

import warnings

warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [9]:
DATA_PATH = "../../data"
os.listdir(DATA_PATH)

['Mawqif_AllTargets_Test.csv',
 '.DS_Store',
 'Mawqif_AllTargets_Blind Test.csv',
 'Mawqif_AllTargets_Train.csv']

In [38]:
df_test = pd.read_csv(os.path.join(DATA_PATH, "Mawqif_AllTargets_Test.csv"))
df_test["stance"].replace({np.nan: "None"}, inplace=True)

with open(f"../../data/gold.txt", "w") as f:
    for index, row in df_test.iterrows():
        f.write(f"{row['ID']}\t{row['target']}\t{row['text']}\t{row['stance']}\n")

In [2]:
def get_bert_models(indices):
    bert_models = [
        "aubmindlab/bert-base-arabertv02-twitter", 
        "aubmindlab/bert-base-arabertv02",
        "UBC-NLP/MARBERT",
        "CAMeL-Lab/bert-base-arabic-camelbert-da"
    ]
    return [bert_models[i] for i in indices]

ensemble_settings = {
    "ENSEMBLE_AVERAGE" : 0,
    "ENSEMBLE_ATTENTION" : 1,
    "ENSEMBLE_GRAPH" : 2
}

weighting_settings = {
    "EQUAL_WEIGHTING" : 0,
    "STATIC_WEIGHTING" : 1,
    "RELATIVE_WEIGHTING" : 2,
    "HEIRARCHICAL_WEIGHTING" : 3,
    "PRIORITIZE_HIGH_CONFIDENCE_WEIGHTING" : 4,
    "PRIORITIZE_LOW_CONFIDENCE_WEIGHTING" : 5,
    "META_WEIGHTING": 6
}

In [3]:
preds = os.listdir("../../res")

c = 0
for pred in preds:
    if "pool" not in pred or "txt" in pred:
        continue
    
    # c+=1
    # model_path = pred
    
    # pool_arg_index = model_path.index("pool")
    # end_index = model_path[pool_arg_index:].index("_")
    # pooling = model_path[pool_arg_index+5:pool_arg_index + end_index]

    # use_bi_arg_index = model_path.index("bi")
    # end_index = model_path[use_bi_arg_index:].index("_")
    # use_bi = model_path[use_bi_arg_index+3:use_bi_arg_index + end_index]

    # use_gru_arg_index = model_path.index("gru")
    # end_index = model_path[use_gru_arg_index:].index("_")
    # use_gru = model_path[use_gru_arg_index+4:use_gru_arg_index + end_index]

    # bert_arg_index = model_path.index("bert")
    # end_index = model_path[bert_arg_index:].index("_")
    # bert_model_indices = model_path[bert_arg_index+6:bert_arg_index + end_index]

    # ensemble_setting_arg_index = model_path.index("ENSEMBLE_")
    # end_index = model_path[ensemble_setting_arg_index + len("ENSEMBLE_"):].index("_")
    # ensemble_setting = model_path[ensemble_setting_arg_index:ensemble_setting_arg_index + len("ENSEMBLE_") + end_index]
    # ensemble_setting = str(ensemble_settings[ensemble_setting])

    # ws = "EQUAL_" if "EQUAL" in model_path else ""
    # ws = "STATIC_" if "STATIC" in model_path else ws
    # ws = "RELATIVE_" if "RELATIVE" in model_path else ws
    # ws = "HEIRARCHICAL_" if "HEIRARCHICAL" in model_path else ws
    # weighting_setting_arg_index = model_path.index(ws)
    # end_index = model_path[weighting_setting_arg_index + len(ws):].index("_")
    # weighting_setting = model_path[weighting_setting_arg_index:weighting_setting_arg_index + len(ws) + end_index]
    # weighting_setting = str(weighting_settings[weighting_setting])

    # bert_models = get_bert_models([int(index) for index in list(bert_model_indices)])

    # print("MODEL SETTINGS")
    # print(f"Model Path: {model_path}")
    # print(f"Bert Models: {bert_models}")
    # print(f"Pooling: {pooling}")
    # print(f"Use Bi: {use_bi}")
    # print(f"Use GRU: {use_gru}")
    # print(f"Ensemble Setting: {ensemble_setting}")
    # print(f"Weighting Setting: {weighting_setting}")
    # print()

    #make a guess file in the following format ID<Tab>Target<Tab>Tweet<Tab>Stance
    #save it in the same directory as the model
    
    df = pd.read_csv(f"../../res/{pred}")
    df["pred"].replace({"None": np.nan}, inplace=True)

    with open(f"../../res//guess_{pred}.txt", "w") as f:
        for index, row in df.iterrows():
            f.write(f"{row['ID']}\t{row['target']}\t{row['text']}\t{row['pred']}\n")


