In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torchtext; torchtext.disable_torchtext_deprecation_warning()
import torchtext.vocab as vocab
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Prepare the dataset
# Read the CSV file
data = pd.read_csv('data.csv')

# Splitting data into features and labels
X = data['tweet'].values

# Splitting the dataset into training and validation sets
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

# Creating DataFrame for training and validation sets
train_data = pd.DataFrame({'tweet': X_train})
test_data = pd.DataFrame({'tweet': X_test})

In [3]:
train_data

Unnamed: 0,tweet
0,RT @FunSizedYogi: @TheBlackVoice well how else...
1,Funny thing is....it's not just the people doi...
2,"RT @winkSOSA: ""@AintShitSweet__: ""@Rakwon_OGOD..."
3,@Jbrendaro30 @ZGabrail @ramsin1995 @GabeEli8 @...
4,S/o that real bitch
...,...
19821,The last at-bat at Yankee Stadium. Thanks for ...
19822,@_bradleey LMFAOOOO yooo I lost my elevator pa...
19823,"#porn,#android,#iphone,#ipad,#sex,#xxx, | #Ana..."
19824,RT @JennyJohnsonHi5: Just when I thought Justi...


In [4]:
test_data

Unnamed: 0,tweet
0,934 8616\ni got a missed call from yo bitch
1,RT @KINGTUNCHI_: Fucking with a bad bitch you ...
2,RT @eanahS__: @1inkkofrosess lol my credit ain...
3,RT @Maxin_Betha Wipe the cum out of them faggo...
4,Niggas cheat on they bitch and don't expect no...
...,...
4952,@GrizzboAdams @wyattnuckels haha ight nig calm...
4953,When you see kids being bad &amp; their parent...
4954,This bitch done blew my high
4955,Fat Trel that niggah &#128076;


In [5]:
# Target Model

# # Load tokenizer and model
# tokenizer = AutoTokenizer.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
# model = AutoModelForSequenceClassification.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
# # This one said non-hate to a lot of hate speech as far as I have tried.

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")
target_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")

In [6]:
# For validation w.r.t. the target model (but does it technically increases the number of queries?)
def get_label(input_text):
    inputs = tokenizer(input_text, return_tensors="pt")

    # Query the target model
    with torch.no_grad():
        target_outputs = target_model(**inputs)

    target_labels = target_outputs.logits.softmax(dim=1).tolist()[0]
    return target_labels

test_data['label'] = test_data['tweet'].apply(get_label) # ~ 2/3 mins
test_data

Unnamed: 0,tweet,label
0,934 8616\ni got a missed call from yo bitch,"[0.5540313720703125, 0.4459685981273651]"
1,RT @KINGTUNCHI_: Fucking with a bad bitch you ...,"[0.3774803578853607, 0.6225196719169617]"
2,RT @eanahS__: @1inkkofrosess lol my credit ain...,"[0.9582731127738953, 0.04172681272029877]"
3,RT @Maxin_Betha Wipe the cum out of them faggo...,"[0.09527343511581421, 0.904726505279541]"
4,Niggas cheat on they bitch and don't expect no...,"[0.11824333667755127, 0.8817566633224487]"
...,...,...
4952,@GrizzboAdams @wyattnuckels haha ight nig calm...,"[0.09661737829446793, 0.9033826589584351]"
4953,When you see kids being bad &amp; their parent...,"[0.35169079899787903, 0.6483091711997986]"
4954,This bitch done blew my high,"[0.21178995072841644, 0.7882100343704224]"
4955,Fat Trel that niggah &#128076;,"[0.08707568049430847, 0.9129243493080139]"


In [7]:
# Pre-trained embeddings
# Load pre-trained GloVe embeddings
embed_dim = 100
glove = vocab.GloVe(name='6B', dim=embed_dim)

# Get the vocabulary from the pre-trained embeddings
glove_vocab = glove.stoi  # Dictionary mapping words to their indices

In [8]:
# Clone Model class
class HateSpeechGRU(nn.Module):
    def __init__(self, pretrained_embeddings, hidden_size, output_dim, dropout):
        super(HateSpeechGRU, self).__init__()
        
        self.embedding = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True)
        self.gru = nn.GRU(embed_dim, hidden_size, num_layers=1, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_size * 2, output_dim)  # * 2 for bidirectional
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        embedded = self.embedding(text)  # text: [batch size, sent len]
        output, hidden = self.gru(embedded)  # output: [batch size, sent len, hidden_size * num_directions]
        hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)  # concatenate the final forward and backward hidden states
        hidden = self.dropout(hidden)
        output = self.fc(hidden)  # output: [batch size, output dim]
        return output

In [9]:
# Clone Model
# Define hyperparameters
pretrained_embeddings = glove.vectors # Create a matrix of pre-trained embeddings
hidden_size = 128  # Size of hidden states in the GRU
output_dim = 2  # Number of output classes (binary classification)
dropout = 0.5  # Dropout probability

clone_model = HateSpeechGRU(pretrained_embeddings, hidden_size, output_dim, dropout)

In [10]:
# Load pre-trained Sentence Transformer model
sent_transformer = SentenceTransformer("all-MiniLM-L6-v2")

# Create the embeddings for the training data
# ~ 30 secs
train_data['embedding'] = sent_transformer.encode(train_data['tweet'].tolist(), convert_to_tensor=True).tolist()

train_data

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Unnamed: 0,tweet,embedding
0,RT @FunSizedYogi: @TheBlackVoice well how else...,"[-0.045171692967414856, 0.1379709541797638, 0...."
1,Funny thing is....it's not just the people doi...,"[0.05024723708629608, 0.001578692113980651, 0...."
2,"RT @winkSOSA: ""@AintShitSweet__: ""@Rakwon_OGOD...","[-0.11634159088134766, 0.04812651500105858, 0...."
3,@Jbrendaro30 @ZGabrail @ramsin1995 @GabeEli8 @...,"[-0.15736790001392365, -0.020282935351133347, ..."
4,S/o that real bitch,"[-0.11650443822145462, -0.026171308010816574, ..."
...,...,...
19821,The last at-bat at Yankee Stadium. Thanks for ...,"[-0.027169330045580864, 0.11435815691947937, 0..."
19822,@_bradleey LMFAOOOO yooo I lost my elevator pa...,"[-0.028217757120728493, -0.05430905893445015, ..."
19823,"#porn,#android,#iphone,#ipad,#sex,#xxx, | #Ana...","[0.0208604633808136, -0.0533132366836071, 0.01..."
19824,RT @JennyJohnsonHi5: Just when I thought Justi...,"[0.009069054387509823, -0.011038430035114288, ..."


In [21]:
# Table to store the previous queries (for wise query selection later)
table = pd.DataFrame({
    'tweet': [],
    'embedding': [],
    't_out': [],
    'c_out': []
})

alpha = 0.5 # Weight for 'dissimilariy with the previous queries' term from the formula
beta = 0.5 # Weight for 'similarity with the previous queries that had hish disagreement' term from the formula

In [22]:
# Train the clone model
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(clone_model.parameters(), lr=0.001)

batch_size = 512

# Define number of epochs
num_epochs = 16

loss_vals = []

# Training loop
for epoch in range(num_epochs):
    clone_model.train()
    for _ in tqdm(range(len(train_data)//batch_size)):

        if table.shape[0] == 0: # Cold start for the first query (can be random)
            idxs = torch.randperm(len(train_data))[:batch_size].tolist()
        else:
            # Wise query selection
            # Calculate the cosine similarity between the embeddings of the training data and the table
            similarities = util.cos_sim(torch.tensor(train_data['embedding'].tolist()), torch.tensor(table['embedding'].tolist()))
            
            # Calculate the average cosine similarity for each training data
            # 'dissimilariy with the previous queries'
            avg_similarities = similarities.mean(dim=1)

            # 'similarity with the previous queries that had hish disagreement'
            disagreement = torch.tensor((abs(table['c_out'] - table['t_out'])).tolist())
            
            similarities = similarities * disagreement
            avg_wrt_disagreement = similarities.mean(dim=1)

            # Calculate the formula for each training data
            formula = alpha * (-avg_similarities) + beta * avg_wrt_disagreement
            
            # Get the index of the training data with the lowest average cosine similarity
            idxs = formula.argsort(descending=True)[:batch_size].tolist()

        input_texts = train_data.iloc[idxs]['tweet']

        input_embeds = train_data.iloc[idxs]['embedding']

        # inputs = tokenizer(input_text, return_tensors="pt")
        input_tokens = tokenizer(input_texts.tolist(), return_tensors="pt", padding=True, truncation=False)

        # Query the target model
        with torch.no_grad():
            target_outputs = target_model(**input_tokens)

        target_labels = target_outputs.logits.softmax(dim=1).tolist()

        # Conver input_text to tensor using glove_vocab
        input_glove = [[glove_vocab[word] if word in glove_vocab else glove_vocab['unk'] for word in input_text.split()] for input_text in input_texts]
        max_len = max([len(input_text) for input_text in input_glove])
        input_glove = [input_text + [glove_vocab['unk']] * (max_len - len(input_text)) for input_text in input_glove]
        input_glove = torch.tensor(input_glove)

        # Convert target labels to tensor
        target_outputs = torch.tensor(target_labels)
        
        # Forward pass through the clone model
        clone_logits = clone_model(input_glove)
        
        # Calculate loss
        clone_outputs = torch.softmax(clone_logits, dim=-1)
        loss = criterion(clone_outputs, target_outputs)
        
        # Add row to table
        row = pd.DataFrame({'tweet': input_texts, 'embedding': input_embeds, 't_out': target_outputs[:,0].tolist(), 'c_out': clone_outputs[:,0].tolist()})
        table = pd.concat([table, row], ignore_index=True)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluation
    clone_model.eval()

    with torch.inference_mode():
        inputs = [[glove_vocab[word] if word in glove_vocab else glove_vocab['unk'] for word in txt.split()] for txt in test_data['tweet'].tolist()] # TODO: Do once, don't do it in every epoch
        max_len = max([len(txt) for txt in inputs])
        inputs = [txt + [glove_vocab['unk']] * (max_len - len(txt)) for txt in inputs]
        test_pred = clone_model(torch.tensor(inputs))
        test_loss = criterion(test_pred, torch.tensor(test_data['label'].tolist()))

    loss_vals.append(test_loss.item())
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {test_loss.item()}')


  5%|▌         | 2/38 [00:27<08:13, 13.71s/it]


KeyboardInterrupt: 