In [None]:
# Set working directory to be project path
import os
os.chdir('/Users/datagero/Documents/local_repos/gatech/Deep Learning/Project/cs-7643-efficiencylane')

# Add to python path
import sys
sys.path.append('/Users/datagero/Documents/local_repos/gatech/Deep Learning/Project/cs-7643-efficiencylane/cs_7643_efficiencylane')

In [None]:
# Initialize the model and Loader
from transformers import RobertaConfig, TextClassificationPipeline, RobertaForSequenceClassification
from data_loaders.citation_intent_data_loader import CSTasksDataLoader
from adapters import AutoAdapterModel, RobertaAdapterModel
import torch

model_variant = "roberta-base"

dataset_name = "sciie"
adapter_path = "adapters/training_output/roberta-base_sciie_double_seq_bn_training_adapter_v01_best/trial_2/seed_9091"

print("Loading Dataset...")
loader = CSTasksDataLoader(model_name="roberta-base",
                                dataset_name=dataset_name,
                                path=f"data/{dataset_name}/",
                                checkpoint_path=f"data/{dataset_name}/processed_dataset.pt")

dataset = loader.load_dataset(overwrite=False)
num_labels = loader.num_labels
print("num_labels:", num_labels)

device = "cpu"#torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ======================================================
# Model & Adapter Config
# ======================================================
# Set up training for the Model and Adapter
config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=num_labels,
)

print("Initialising Model...")
model = RobertaAdapterModel.from_pretrained(model_variant, config=config)
model.to(device)

print("Adding Adapter...")
adapter_name = model.load_adapter(adapter_path)
model.set_active_adapters(adapter_name)


In [None]:
loader.label_encoder

In [None]:
# Function to predict labels for a list of texts
def classify_texts(model, tokenizer, texts):
    # Prepare the model input
    encoded_inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    input_ids = encoded_inputs['input_ids'].to(model.device)
    attention_mask = encoded_inputs['attention_mask'].to(model.device)

    # Predict
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        # print(outputs)
        predictions = torch.argmax(outputs.logits, dim=-1)

    # Convert predictions to labels (if needed, map these indices back to label names)
    return predictions.cpu().numpy()

In [None]:
import torch

# Get all unique labels in the dataset
unique_labels = torch.unique(torch.tensor(dataset['test']['labels']))
text_by_label = {}

# Extract corresponding texts
for label in unique_labels:
    inx_label = [idx for idx, val in enumerate(dataset['test']['labels']) if val == label.item()]
    text_by_label[label.item()] = [dataset['test']['text'][i] for i in inx_label]

print(text_by_label)


In [None]:
predictions_by_label = {}

# Load the tokenizer from the data loader
tokenizer = loader.tokenizer

# Classify texts and store predictions
for label, texts in text_by_label.items():
    if texts:
        predictions = classify_texts(model, tokenizer, texts)
        predictions_by_label[label] = predictions
    else:
        predictions_by_label[label] = []

# Print predictions for each label
for label, predictions in predictions_by_label.items():
    print(f"Label {label} Predictions:", predictions)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Label decoding
label_decoder = {v: k for k, v in loader.label_encoder.items()}

# Aggregate true labels and their predictions
true_labels = []
predicted_labels = []

for true_label, preds in predictions_by_label.items():
    true_labels.extend([true_label] * len(preds))
    predicted_labels.extend(preds)

# Compute the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Generate label names using the label decoder
axis_labels = [label_decoder[i] for i in sorted(label_decoder.keys())]

# Create a figure to plot
plt.figure(figsize=(12, 10))
ax = sns.heatmap(cm, annot=True, fmt='d', cmap='viridis', xticklabels=axis_labels, yticklabels=axis_labels, linewidths=.5, linecolor='white')

# Beautifying the plot
plt.title('Confusion Matrix', fontsize=20, fontweight='bold', pad=20)
plt.xlabel('Predicted Labels', fontsize=14, labelpad=10)
plt.ylabel('True Labels', fontsize=14, labelpad=10)
plt.xticks(fontsize=12, rotation=45, ha='right')
plt.yticks(fontsize=12, rotation=0, va='center')

# Draw the grid lines
ax.set_xticks(np.arange(cm.shape[1]+1)-.5, minor=True)
ax.set_yticks(np.arange(cm.shape[0]+1)-.5, minor=True)
ax.grid(which="minor", color="white", linestyle='-', linewidth=0.3)
ax.tick_params(which="minor", size=0)

# Avoid the grid lines cutting through the boxes
ax.set_xticklabels(axis_labels, rotation=45, horizontalalignment='right')
ax.set_yticklabels(axis_labels, rotation=0, verticalalignment='center')

# Show the plot
plt.savefig("project_report/resources/confusion_matrix_predictions_roberta-base_sciie_double_seq_bn.png")
plt.show()


In [None]:
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer
from sklearn.metrics import f1_score

training_args = TrainingArguments(
    learning_rate=0.0009529536457150203,
    num_train_epochs=9,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def macro_f1(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {"macro_f1": f1_score(p.label_ids, preds, average='macro')}

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    compute_metrics=macro_f1,
)

In [None]:
trainer.evaluate()

In [None]:
trainer.evaluate(dataset["test"])