# Fine-tune BERT-Base-Uncased on mocktrain.json

This notebook trains a BERT-based relation extraction model on `mocktrain.json` and evaluates on `mock.json`. It reports accuracy, precision, recall, and F1 score.

In [4]:
!pip install transformers torch scikit-learn


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from torch.optim import AdamW
from sklearn.metrics import classification_report, accuracy_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Load datasets
with open('/content/mocktrain.json', 'r') as f:
    train_data = json.load(f)
with open('/content/mockval.json', 'r') as f:
    val_data = json.load(f)

# Labels
label_list = ["no_relation", "org:alternate_names", "org:city_of_headquarters", "org:country_of_headquarters", "org:dissolved", "org:founded", "org:founded_by", "org:member_of", "org:members", "org:number_of_employees/members", "org:parents", "org:stateorprovince_of_headquarters", "org:subsidiaries", "org:top_members/employees", "org:website", "per:age", "per:alternate_names", "per:children", "per:cities_of_residence", "per:city_of_birth", "per:city_of_death", "per:countries_of_residence", "per:country_of_birth", "per:country_of_death", "per:date_of_birth", "per:date_of_death", "per:employee_of", "per:origin", "per:other_family", "per:parents", "per:religion", "per:schools_attended", "per:siblings", "per:spouse", "per:stateorprovince_of_birth", "per:stateorprovince_of_death", "per:stateorprovinces_of_residence", "per:title"]
rel2id = {label: i for i, label in enumerate(label_list)}


In [7]:
# Tokenizer and preprocessing
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def preprocess(data):
    samples = []
    for ex in data:
        sent = ex['sentence']
        e1, e2 = ex['entities']
        sent_marked = sent.replace(e1, 'SUBJ', 1).replace(e2, 'OBJ', 1)
        enc = tokenizer(
            f"[CLS] {sent_marked} [SEP] {e1} [SEP] {e2} [SEP]",
            padding='max_length', truncation=True, max_length=128, return_tensors='pt'
        )
        samples.append((enc['input_ids'].squeeze(0), enc['attention_mask'].squeeze(0), rel2id[ex['relation']]))
    return samples

train_samples = preprocess(train_data)
val_samples = preprocess(val_data)


In [8]:
# Dataset and DataLoader
class REDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        return self.samples[idx]

train_loader = DataLoader(REDataset(train_samples), batch_size=16, shuffle=True)
val_loader = DataLoader(REDataset(val_samples), batch_size=32)


In [9]:
# Model
class BertRE(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.classifier = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 256), nn.ReLU(), nn.Dropout(0.1),
            nn.Linear(256, num_labels)
        )
    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_out = out.last_hidden_state[:, 0, :]
        return self.classifier(cls_out)

model = BertRE(num_labels=len(label_list)).to(device)
optimizer = AdamW(model.parameters(), lr=5e-5)
criterion = nn.CrossEntropyLoss()


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [10]:
# Training Loop
for epoch in range(5):
    model.train()
    total_loss = 0
    for input_ids, attention_mask, labels in train_loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")


Epoch 1 Loss: 3.5066
Epoch 2 Loss: 2.4194
Epoch 3 Loss: 1.3226
Epoch 4 Loss: 0.7137
Epoch 5 Loss: 0.4126


In [14]:
import json
from sklearn.metrics import classification_report, accuracy_score

# === your existing code ===
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for input_ids, attention_mask, labels in val_loader:
        input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
        logits = model(input_ids, attention_mask)
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

present_labels = sorted(set(all_labels + all_preds))
present_names  = [label_list[i] for i in present_labels]

# === generate report dict instead of plain text ===
report_dict = classification_report(
    all_labels,
    all_preds,
    labels=present_labels,
    target_names=present_names,
    zero_division=0,
    output_dict=True
)
accuracy = accuracy_score(all_labels, all_preds)

# === print to screen ===
print(classification_report(
    all_labels,
    all_preds,
    labels=present_labels,
    target_names=present_names,
    zero_division=0
))
print(f"Accuracy: {accuracy:.4f}")

# === save metrics ===
metrics = {
    "accuracy": accuracy,
    "classification_report": report_dict
}
with open("metrics.json", "w") as mf:
    json.dump(metrics, mf, indent=2)

# === save per-example predictions ===
# assuming you still have `val_data` loaded as a list of dicts
outputs = []
for sample, pred in zip(val_data, all_preds):
    outputs.append({
        "sentence": sample["sentence"],
        "entities":    sample["entities"],
        "true":        sample["relation"],
        "predicted":   label_list[pred]
    })

with open("predictions.json", "w") as pf:
    json.dump(outputs, pf, indent=2)

print("Saved metrics.json and predictions.json")


                             precision    recall  f1-score   support

                no_relation       1.00      0.40      0.57         5
   org:city_of_headquarters       0.00      0.00      0.00         1
org:country_of_headquarters       0.00      0.00      0.00         0
             org:founded_by       0.67      1.00      0.80         2
                org:website       0.00      0.00      0.00         0
          per:city_of_birth       0.00      0.00      0.00         1
            per:employee_of       0.00      0.00      0.00         1
                  per:title       0.00      0.00      0.00         0

                   accuracy                           0.40        10
                  macro avg       0.21      0.17      0.17        10
               weighted avg       0.63      0.40      0.45        10

Accuracy: 0.4000
Saved metrics.json and predictions.json


In [None]:
# Save Model
torch.save(model.state_dict(), 'bert_base_mocktacred.pt')
print('Model saved as bert_base_mocktacred.pt')