In [24]:
import numpy as np
import torch
import io
import pickle
import os

In [25]:
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)

In [26]:
embeddings = {}
for model in ["BERT", "BERTweet", "HateBERT"]:
    embeddings[model] = {}
    for dataset in ["OLID", "DynaHate", "Latent_Hatred"]:
        embeddings[model][dataset] = {}
        for split in ["Training", "Test", "Val"]:
            name = "".join(dataset.split("_"))
            embeddings[model][dataset][split] = CPU_Unpickler(open(f"Torch_Model_Embeddings\{model}\{dataset}\{name}_{split}","rb")).load()

In [27]:
# Create new dict with same keys but with numpy arrays
embeddings_np = {}
for model in ["BERT", "BERTweet", "HateBERT"]:
    embeddings_np[model] = {}
    for dataset in ["OLID", "DynaHate", "Latent_Hatred"]:
        embeddings_np[model][dataset] = {}
        for split in ["Training", "Test", "Val"]:
            embeddings_np[model][dataset][split] = {}
            embeddings_np[model][dataset][split] = embeddings[model][dataset][split].numpy()

In [28]:
# See an example
print(embeddings_np["BERT"]["DynaHate"]["Test"].shape)

(4120, 1, 768)


In [30]:
# Save as pickles
for model in ["BERT", "BERTweet", "HateBERT"]:
    for dataset in ["OLID", "DynaHate", "Latent_Hatred"]:
        for split in ["Training", "Test", "Val"]:
            name = "".join(dataset.split("_"))
            if not os.path.exists(f"Numpy_Model_Embeddings\{model}"):
                os.makedirs(f"Numpy_Model_Embeddings\{model}")
            if not os.path.exists(f"Numpy_Model_Embeddings\{model}\{dataset}"):
                os.makedirs(f"Numpy_Model_Embeddings\{model}\{dataset}")
            with open(f"Numpy_Model_Embeddings\{model}\{dataset}\{name}_{split}", "wb") as f:
                pickle.dump(embeddings_np[model][dataset][split], f)