In [1]:
import transformers
import torch
import os
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
Sentence = [
    "I hate Muslim terrorists and the psychotic Muslims in ISIS. Is that allowed in your politically correct version of reality?",
    "Apparently the emails state he buys meth from gay Islamic party boys who are transphobic hate blacks puppies and illegally voted for Hillary",
    "None of that would ever have happened if he werent black. What do you mean?",
    "1000000 women are just idiots? Your simpleminded generalization paints you as seeming to be one too"
]

label = [1, 1, 0, 0]

In [None]:
# define which model are you testing
model_name = "albert-xxlarge-v2"

In [4]:
def load_model(model_name, task_name):
    # Load the configuration
    config_path = f"{task_name}_model/{model_name}/config.json"
    config = transformers.AutoConfig.from_pretrained(config_path)

    # Load the model state dict
    model_path = f"{task_name}_model/{model_name}/pytorch_model.bin"
    model = transformers.AutoModelForSequenceClassification.from_pretrained(
        pretrained_model_name_or_path=None,
        config=config,
        state_dict=torch.load(model_path)
    )

    # Load the tokenizer
    tokenizer = transformers.AutoTokenizer.from_pretrained(f"{task_name}_model/{model_name}")

    return model, tokenizer


In [5]:
# Function to load test data from CSV
def load_test_data(csv_path):
    data = pd.read_csv(csv_path)
    print(data)
    return data

In [6]:
def evaluate_model(model, tokenizer, test_data, task_name, batch_size=8):
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    model.to(device)
    true_list = []
    pred_list = []
    sentences = []
    
    for i in range(0, len(test_data), batch_size):
        batch_data = test_data.iloc[i:i+batch_size]
        inputs = tokenizer(batch_data['Sentence'].tolist(), max_length=128, padding=True, truncation=True, return_tensors="pt")
        labels = torch.tensor(batch_data['labels'].tolist())
        inputs = {key: val.to(device) for key, val in inputs.items()}
        labels = labels.to(device)
        
        with torch.no_grad():
            logits = model(**inputs).logits
        
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        true_list.extend(labels.cpu().numpy())
        pred_list.extend(predictions)
        sentences.extend(batch_data['Sentence'].tolist())
    
    # Calculate evaluation metrics
    conf_matrix = confusion_matrix(true_list, pred_list)
    class_report = classification_report(true_list, pred_list, output_dict=True)
    df_data = {
        "Sentence": sentences,
        "True": true_list,
        "Predicted": pred_list
    }
    df = pd.DataFrame(df_data)
    df.to_csv(f"{task_name}_results.csv", index=False)
    return conf_matrix, class_report

In [7]:
# Load models
bias_model, bias_tokenizer = load_model(model_name, "bias_2")
stereotype_model, stereotype_tokenizer = load_model(model_name, "stereotype")

# Evaluate models
for task_name, model, tokenizer in [("bias_2", bias_model, bias_tokenizer), ("stereotype", stereotype_model, stereotype_tokenizer)]:
    # Load test data
    #Give path to test data
    test_csv_path = "Dataset/StereoSet/test.csv" if task_name=='stereotype' else "Dataset/BABE/test.csv"
    test_data = load_test_data(test_csv_path)
    print(f"Evaluating {task_name} model...")
    conf_matrix, class_report = evaluate_model(model, tokenizer, test_data, task_name)

    print("---------------------------------Confusion Matrix------------------------------------")
    print(pd.DataFrame(conf_matrix))

    print("---------------------------------Evaluation Metrics------------------------------------")
    print(pd.DataFrame(class_report).transpose())
        
    # Save results to CSV
    result_csv_path = f"./results/{task_name}_{model_name}_results.csv"
    pd.DataFrame(class_report).transpose().to_csv(result_csv_path, index=True)
    print(f"Results saved to {result_csv_path}")

  state_dict=torch.load(model_path)
  state_dict=torch.load(model_path)


                                              Sentence  labels  \
0    As the Black Lives Matter movement grows, comp...       0   
1    The case of Rahaf Mohammed al-Qunun drawn new ...       0   
2    The Post said the talks on payroll taxes were ...       0   
3    Nearly 78 percent of Americans report experien...       0   
4    Colin P. Clarke has been teaching a course on ...       0   
..                                                 ...     ...   
995  Monday for his show’s opening monologue, Fox N...       1   
996  Under the changes, all military style semi-aut...       0   
997  The idea of imposing a wealth tax on the riche...       0   
998  The poisonings come more than five months into...       0   
999  So you have the New York Times, the Tax Policy...       1   

                 topic    id  
0    marriage-equality     1  
1               gender     2  
2                taxes     3  
3          environment     4  
4    white-nationalism     5  
..                 ..

In [8]:
!pwd

/home/pushpak/aditya/New/MTL-E


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
