<a href="https://colab.research.google.com/github/its3alih/Thesis/blob/main/BiLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##FIRST

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load Data
def load_excel_data(file_path):
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # Use -100 to ignore in loss

        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # shape: (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with detailed metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # Ignore padding tokens
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IO.xlsx"  # Your IO tagging dataset path
    sentences, tags = load_excel_data(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.1485
Epoch 2/10, Loss: 0.0362
Epoch 3/10, Loss: 0.0186
Epoch 4/10, Loss: 0.0107
Epoch 5/10, Loss: 0.0059
Epoch 6/10, Loss: 0.0034
Epoch 7/10, Loss: 0.0018
Epoch 8/10, Loss: 0.0009
Epoch 9/10, Loss: 0.0007
Epoch 10/10, Loss: 0.0004

Evaluation on test data:
              precision    recall  f1-score   support

           I     0.9898    0.9327    0.9604       520
           O     0.9968    0.9995    0.9982     11010

    accuracy                         0.9965     11530
   macro avg     0.9933    0.9661    0.9793     11530
weighted avg     0.9965    0.9965    0.9965     11530

Accuracy: 0.9965
Precision: 0.9965
Recall: 0.9965
F1 Score: 0.9965


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IE Data (adapted for IE tagging)
def load_excel_data_ie(file_path):
    # Assuming same format as IO but with IE tags instead
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # Use -100 to ignore in loss

        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # shape: (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with detailed metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # Ignore padding tokens
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IE.xlsx"  # Your IE tagging dataset path
    sentences, tags = load_excel_data_ie(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3592
Epoch 2/10, Loss: 0.0870
Epoch 3/10, Loss: 0.0413
Epoch 4/10, Loss: 0.0236
Epoch 5/10, Loss: 0.0141
Epoch 6/10, Loss: 0.0092
Epoch 7/10, Loss: 0.0058
Epoch 8/10, Loss: 0.0039
Epoch 9/10, Loss: 0.0026
Epoch 10/10, Loss: 0.0024

Evaluation on test data:
              precision    recall  f1-score   support

           E     0.9382    0.9247    0.9314       279
          EO     0.9662    0.9554    0.9607       269
           I     0.9573    0.9208    0.9387       341
          IO     0.9955    0.9973    0.9964     11068

    accuracy                         0.9925     11957
   macro avg     0.9643    0.9496    0.9568     11957
weighted avg     0.9924    0.9925    0.9924     11957

Accuracy: 0.9925
Precision: 0.9924
Recall: 0.9925
F1 Score: 0.9924


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOB Data
def load_excel_data_iob(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOB tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOB.xlsx"  # Your IOB tagging dataset path
    sentences, tags = load_excel_data_iob(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.2174
Epoch 2/10, Loss: 0.0461
Epoch 3/10, Loss: 0.0222
Epoch 4/10, Loss: 0.0121
Epoch 5/10, Loss: 0.0073
Epoch 6/10, Loss: 0.0043
Epoch 7/10, Loss: 0.0025
Epoch 8/10, Loss: 0.0014
Epoch 9/10, Loss: 0.0010
Epoch 10/10, Loss: 0.0008

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9922    0.9200    0.9547       275
           I     0.9727    0.8934    0.9314       319
           O     0.9952    0.9992    0.9972     11299

    accuracy                         0.9945     11893
   macro avg     0.9867    0.9375    0.9611     11893
weighted avg     0.9945    0.9945    0.9944     11893

Accuracy: 0.9945
Precision: 0.9945
Recall: 0.9945
F1 Score: 0.9944


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOBES Data
def load_excel_data_iobes(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOBES tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOBES.xlsx"  # Your IOBES tagging dataset path
    sentences, tags = load_excel_data_iobes(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3229
Epoch 2/10, Loss: 0.0640
Epoch 3/10, Loss: 0.0310
Epoch 4/10, Loss: 0.0180
Epoch 5/10, Loss: 0.0114
Epoch 6/10, Loss: 0.0079
Epoch 7/10, Loss: 0.0053
Epoch 8/10, Loss: 0.0040
Epoch 9/10, Loss: 0.0030
Epoch 10/10, Loss: 0.0020

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9914    0.9274    0.9583       248
           E     0.9741    0.9113    0.9417       248
           I     0.9714    0.7234    0.8293        47
           O     0.9957    0.9996    0.9976     11488
           S     0.7500    0.6000    0.6667         5

    accuracy                         0.9950     12036
   macro avg     0.9365    0.8323    0.8787     12036
weighted avg     0.9949    0.9950    0.9949     12036

Accuracy: 0.9950
Precision: 0.9949
Recall: 0.9950
F1 Score: 0.9949


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOE Data
def load_excel_data_ioe(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOE tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOE.xlsx"  # Your IOE tagging dataset path
    sentences, tags = load_excel_data_ioe(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.2369
Epoch 2/10, Loss: 0.0502
Epoch 3/10, Loss: 0.0249
Epoch 4/10, Loss: 0.0148
Epoch 5/10, Loss: 0.0091
Epoch 6/10, Loss: 0.0054
Epoch 7/10, Loss: 0.0033
Epoch 8/10, Loss: 0.0023
Epoch 9/10, Loss: 0.0015
Epoch 10/10, Loss: 0.0010

Evaluation on test data:
              precision    recall  f1-score   support

           E     0.9810    0.9314    0.9556       277
           I     0.9720    0.9343    0.9528       335
           O     0.9964    0.9989    0.9977     10903

    accuracy                         0.9954     11515
   macro avg     0.9832    0.9549    0.9687     11515
weighted avg     0.9954    0.9954    0.9953     11515

Accuracy: 0.9954
Precision: 0.9954
Recall: 0.9954
F1 Score: 0.9953


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load BI Data
def load_excel_data_bi(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using BI tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/BI.xlsx"  # Your BI tagging dataset path
    sentences, tags = load_excel_data_bi(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3285
Epoch 2/10, Loss: 0.0725
Epoch 3/10, Loss: 0.0353
Epoch 4/10, Loss: 0.0204
Epoch 5/10, Loss: 0.0119
Epoch 6/10, Loss: 0.0078
Epoch 7/10, Loss: 0.0055
Epoch 8/10, Loss: 0.0034
Epoch 9/10, Loss: 0.0024
Epoch 10/10, Loss: 0.0017

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9577    0.9842    0.9708       253
          BO     0.9474    0.9340    0.9406       212
           I     0.9585    0.9685    0.9635       286
          IO     0.9983    0.9976    0.9979     10905

    accuracy                         0.9955     11656
   macro avg     0.9654    0.9711    0.9682     11656
weighted avg     0.9955    0.9955    0.9955     11656

Accuracy: 0.9955
Precision: 0.9955
Recall: 0.9955
F1 Score: 0.9955


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load BIES Data
def load_excel_data_bies(file_path):
    # Dataset with columns 'Word i' and 'Word i entity tag' using BIES tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/BIES.xlsx"  # Your BIES tagging dataset path
    sentences, tags = load_excel_data_bies(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.5207
Epoch 2/10, Loss: 0.1246
Epoch 3/10, Loss: 0.0596
Epoch 4/10, Loss: 0.0381
Epoch 5/10, Loss: 0.0254
Epoch 6/10, Loss: 0.0174
Epoch 7/10, Loss: 0.0118
Epoch 8/10, Loss: 0.0096
Epoch 9/10, Loss: 0.0067
Epoch 10/10, Loss: 0.0050

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9640    0.9683    0.9661       221
          BO     0.8989    0.9357    0.9169       171
           E     0.9286    0.9412    0.9348       221
          EO     0.9481    0.9710    0.9594       207
           I     0.9062    0.6905    0.7838        42
          IO     0.9967    0.9961    0.9964     10900
           S     0.6667    0.6667    0.6667         3
          SO     1.0000    1.0000    1.0000         4

    accuracy                         0.9921     11769
   macro avg     0.9136    0.8962    0.9030     11769
weighted avg     0.9921    0.9921    0.9920     11769

Accuracy: 0.9921
Precision: 0.9921
Recall: 0.9921
F1 Score: 0.9920


##SECOND

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load Data
def load_excel_data(file_path):
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # Use -100 to ignore in loss

        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # shape: (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with detailed metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # Ignore padding tokens
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IO.xlsx"  # Your IO tagging dataset path
    sentences, tags = load_excel_data(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.1560
Epoch 2/10, Loss: 0.0358
Epoch 3/10, Loss: 0.0183
Epoch 4/10, Loss: 0.0107
Epoch 5/10, Loss: 0.0059
Epoch 6/10, Loss: 0.0034
Epoch 7/10, Loss: 0.0018
Epoch 8/10, Loss: 0.0009
Epoch 9/10, Loss: 0.0006
Epoch 10/10, Loss: 0.0004

Evaluation on test data:
              precision    recall  f1-score   support

           I     0.9697    0.9428    0.9561       577
           O     0.9969    0.9984    0.9977     10789

    accuracy                         0.9956     11366
   macro avg     0.9833    0.9706    0.9769     11366
weighted avg     0.9956    0.9956    0.9956     11366

Accuracy: 0.9956
Precision: 0.9956
Recall: 0.9956
F1 Score: 0.9956


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IE Data (adapted for IE tagging)
def load_excel_data_ie(file_path):
    # Assuming same format as IO but with IE tags instead
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # Use -100 to ignore in loss

        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # shape: (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with detailed metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # Ignore padding tokens
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IE.xlsx"  # Your IE tagging dataset path
    sentences, tags = load_excel_data_ie(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3476
Epoch 2/10, Loss: 0.0801
Epoch 3/10, Loss: 0.0409
Epoch 4/10, Loss: 0.0245
Epoch 5/10, Loss: 0.0142
Epoch 6/10, Loss: 0.0084
Epoch 7/10, Loss: 0.0054
Epoch 8/10, Loss: 0.0034
Epoch 9/10, Loss: 0.0021
Epoch 10/10, Loss: 0.0014

Evaluation on test data:
              precision    recall  f1-score   support

           E     0.9508    0.9280    0.9393       250
          EO     0.9664    0.9583    0.9623       240
           I     0.9582    0.9228    0.9402       298
          IO     0.9957    0.9975    0.9966     10773

    accuracy                         0.9933     11561
   macro avg     0.9678    0.9517    0.9596     11561
weighted avg     0.9932    0.9933    0.9932     11561

Accuracy: 0.9933
Precision: 0.9932
Recall: 0.9933
F1 Score: 0.9932


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOB Data
def load_excel_data_iob(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOB tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOB.xlsx"  # Your IOB tagging dataset path
    sentences, tags = load_excel_data_iob(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.2283
Epoch 2/10, Loss: 0.0456
Epoch 3/10, Loss: 0.0237
Epoch 4/10, Loss: 0.0129
Epoch 5/10, Loss: 0.0067
Epoch 6/10, Loss: 0.0039
Epoch 7/10, Loss: 0.0023
Epoch 8/10, Loss: 0.0020
Epoch 9/10, Loss: 0.0011
Epoch 10/10, Loss: 0.0008

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9835    0.9370    0.9597       254
           I     0.9788    0.9264    0.9519       299
           O     0.9965    0.9991    0.9978     10744

    accuracy                         0.9958     11297
   macro avg     0.9862    0.9542    0.9698     11297
weighted avg     0.9957    0.9958    0.9957     11297

Accuracy: 0.9958
Precision: 0.9957
Recall: 0.9958
F1 Score: 0.9957


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOBES Data
def load_excel_data_iobes(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOBES tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOBES.xlsx"  # Your IOBES tagging dataset path
    sentences, tags = load_excel_data_iobes(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3241
Epoch 2/10, Loss: 0.0641
Epoch 3/10, Loss: 0.0300
Epoch 4/10, Loss: 0.0184
Epoch 5/10, Loss: 0.0128
Epoch 6/10, Loss: 0.0083
Epoch 7/10, Loss: 0.0054
Epoch 8/10, Loss: 0.0038
Epoch 9/10, Loss: 0.0026
Epoch 10/10, Loss: 0.0019

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9960    0.9691    0.9824       259
           E     0.9881    0.9614    0.9746       259
           I     1.0000    0.8824    0.9375        68
           O     0.9979    0.9998    0.9988     11210
           S     1.0000    1.0000    1.0000         2

    accuracy                         0.9976     11798
   macro avg     0.9964    0.9625    0.9787     11798
weighted avg     0.9976    0.9976    0.9976     11798

Accuracy: 0.9976
Precision: 0.9976
Recall: 0.9976
F1 Score: 0.9976


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load IOE Data
def load_excel_data_ioe(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using IOE tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/IOE.xlsx"  # Your IOE tagging dataset path
    sentences, tags = load_excel_data_ioe(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.2238
Epoch 2/10, Loss: 0.0441
Epoch 3/10, Loss: 0.0244
Epoch 4/10, Loss: 0.0149
Epoch 5/10, Loss: 0.0090
Epoch 6/10, Loss: 0.0054
Epoch 7/10, Loss: 0.0036
Epoch 8/10, Loss: 0.0021
Epoch 9/10, Loss: 0.0012
Epoch 10/10, Loss: 0.0009

Evaluation on test data:
              precision    recall  f1-score   support

           E     0.9816    0.9175    0.9485       291
           I     0.9819    0.9420    0.9615       345
           O     0.9962    0.9991    0.9976     11196

    accuracy                         0.9954     11832
   macro avg     0.9866    0.9529    0.9692     11832
weighted avg     0.9954    0.9954    0.9954     11832

Accuracy: 0.9954
Precision: 0.9954
Recall: 0.9954
F1 Score: 0.9954


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load BI Data
def load_excel_data_bi(file_path):
    # Assumes dataset with columns 'Word i' and 'Word i entity tag' using BI tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/BI.xlsx"  # Your BI tagging dataset path
    sentences, tags = load_excel_data_bi(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.3261
Epoch 2/10, Loss: 0.0683
Epoch 3/10, Loss: 0.0319
Epoch 4/10, Loss: 0.0177
Epoch 5/10, Loss: 0.0116
Epoch 6/10, Loss: 0.0073
Epoch 7/10, Loss: 0.0050
Epoch 8/10, Loss: 0.0033
Epoch 9/10, Loss: 0.0025
Epoch 10/10, Loss: 0.0016

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9799    0.9644    0.9721       253
          BO     0.9686    0.9204    0.9439       201
           I     0.9815    0.9138    0.9464       290
          IO     0.9958    0.9989    0.9973     10882

    accuracy                         0.9947     11626
   macro avg     0.9814    0.9494    0.9649     11626
weighted avg     0.9946    0.9947    0.9946     11626

Accuracy: 0.9947
Precision: 0.9946
Recall: 0.9947
F1 Score: 0.9946


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# 1. Load BIES Data
def load_excel_data_bies(file_path):
    # Dataset with columns 'Word i' and 'Word i entity tag' using BIES tagging scheme
    df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
    df = df[['Word i', 'Word i entity tag']].dropna()

    sentences, labels = [], []
    sentence, label = [], []
    for word, tag in zip(df['Word i'], df['Word i entity tag']):
        if str(word).strip() in ['.', '؟']:
            if sentence:
                sentences.append(sentence)
                labels.append(label)
                sentence, label = [], []
        else:
            sentence.append(str(word).strip())
            label.append(str(tag).strip())

    if sentence:
        sentences.append(sentence)
        labels.append(label)

    return sentences, labels

# 2. Build vocabularies for words and tags
def build_vocab(sentences, tags):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    tag2idx = {}
    for sent in sentences:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(word2idx)
    for tag_seq in tags:
        for tag in tag_seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return word2idx, tag2idx

# 3. Dataset class
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word2idx, tag2idx, max_len=50):
        self.sentences = sentences
        self.tags = tags
        self.word2idx = word2idx
        self.tag2idx = tag2idx
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = self.sentences[idx]
        tag_seq = self.tags[idx]

        # Convert words and tags to indices
        word_ids = [self.word2idx.get(w, self.word2idx['<UNK>']) for w in sent]
        tag_ids = [self.tag2idx[t] for t in tag_seq]

        # Padding
        pad_len = self.max_len - len(word_ids)
        if pad_len > 0:
            word_ids = word_ids + [self.word2idx['<PAD>']] * pad_len
            tag_ids = tag_ids + [-100] * pad_len  # ignore_index for loss
        else:
            word_ids = word_ids[:self.max_len]
            tag_ids = tag_ids[:self.max_len]

        return torch.tensor(word_ids), torch.tensor(tag_ids)

# 4. BiLSTM model for sequence tagging
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMTagger, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, x):
        emb = self.embedding(x)
        lstm_out, _ = self.lstm(emb)
        logits = self.fc(lstm_out)
        return logits

# 5. Training function
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x_batch, y_batch in data_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(x_batch)  # (batch_size, seq_len, num_tags)

        outputs = outputs.view(-1, outputs.shape[-1])
        y_batch = y_batch.view(-1)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

# 6. Evaluation function with classification report and metrics
def eval_model_metrics(model, data_loader, tag2idx, device):
    model.eval()
    true_tags = []
    pred_tags = []

    idx2tag = {v:k for k,v in tag2idx.items()}

    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=-1)

            for i in range(len(y_batch)):
                true_seq = y_batch[i].cpu().numpy()
                pred_seq = preds[i].cpu().numpy()
                for t, p in zip(true_seq, pred_seq):
                    if t != -100:  # ignore padding
                        true_tags.append(idx2tag[t])
                        pred_tags.append(idx2tag[p])

    report = classification_report(true_tags, pred_tags, digits=4)
    accuracy = accuracy_score(true_tags, pred_tags)
    precision = precision_score(true_tags, pred_tags, average='weighted')
    recall = recall_score(true_tags, pred_tags, average='weighted')
    f1 = f1_score(true_tags, pred_tags, average='weighted')

    return report, accuracy, precision, recall, f1

# 7. Main execution
if __name__ == "__main__":
    file_path = "/content/BIES.xlsx"  # Your BIES tagging dataset path
    sentences, tags = load_excel_data_bies(file_path)
    word2idx, tag2idx = build_vocab(sentences, tags)

    max_len = 50
    dataset = NERDataset(sentences, tags, word2idx, tag2idx, max_len=max_len)

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = BiLSTMTagger(len(word2idx), len(tag2idx)).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    epochs = 10
    for epoch in range(epochs):
        loss = train_epoch(model, train_loader, optimizer, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    print("\nEvaluation on test data:")
    report, accuracy, precision, recall, f1 = eval_model_metrics(model, test_loader, tag2idx, device)
    print(report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


Epoch 1/10, Loss: 0.5031
Epoch 2/10, Loss: 0.1137
Epoch 3/10, Loss: 0.0555
Epoch 4/10, Loss: 0.0336
Epoch 5/10, Loss: 0.0207
Epoch 6/10, Loss: 0.0137
Epoch 7/10, Loss: 0.0093
Epoch 8/10, Loss: 0.0063
Epoch 9/10, Loss: 0.0045
Epoch 10/10, Loss: 0.0034

Evaluation on test data:
              precision    recall  f1-score   support

           B     0.9922    0.9410    0.9659       271
          BO     0.9559    0.9330    0.9443       209
           E     0.9762    0.9077    0.9407       271
          EO     0.9801    0.9535    0.9666       258
           I     0.9565    0.7097    0.8148        62
          IO     0.9922    0.9986    0.9954     10290
           S     0.0000    0.0000    0.0000         2
          SO     1.0000    0.3333    0.5000         6

    accuracy                         0.9908     11369
   macro avg     0.8566    0.7221    0.7660     11369
weighted avg     0.9905    0.9908    0.9904     11369

Accuracy: 0.9908
Precision: 0.9905
Recall: 0.9908
F1 Score: 0.9904


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
