## Transformer based model

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertModel, AdamW
import pandas as pd
from sklearn.preprocessing import LabelEncoder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_data(file_path, is_test=False):
    data = pd.read_csv(file_path)
    if is_test:
        return data[['id', 'Text']]
    else:
        return data[['Text', 'Class']]

train_data = pd.concat([load_data('Data/AWM_train.csv'), load_data('Data/AWT_train.csv'), load_data('Data/AWM_dev.csv'), load_data('Data/AWT_dev.csv')])
dev_data = pd.concat([load_data('Data/AWM_dev.csv'), load_data('Data/AWT_dev.csv')])

train_data['Class'] = train_data['Class'].str.lower()
dev_data['Class'] = dev_data['Class'].str.lower()

label_encoder = LabelEncoder()
train_data['Class'] = label_encoder.fit_transform(train_data['Class'])
dev_data['Class'] = label_encoder.transform(dev_data['Class'])

class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Class Mapping:", class_mapping)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class TextDataset(Dataset):
    def __init__(self, texts, labels=None, tokenizer=tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors="pt"
        )
        input_ids = inputs['input_ids'].squeeze(0)
        attention_mask = inputs['attention_mask'].squeeze(0)
        if self.labels is not None:
            label = torch.tensor(self.labels[idx], dtype=torch.long)
            return input_ids, attention_mask, label
        return input_ids, attention_mask

train_dataset = TextDataset(train_data['Text'].tolist(), train_data['Class'].tolist())
dev_dataset = TextDataset(dev_data['Text'].tolist(), dev_data['Class'].tolist())

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16)

class ComplexTransformerModel(nn.Module):
    def __init__(self, num_classes, num_transformer_layers=2, hidden_dim=768, dropout=0.3):
        super(ComplexTransformerModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=8,
            dim_feedforward=hidden_dim * 4,
            dropout=dropout,
            activation='relu'
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_transformer_layers)

        self.fc1 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc2 = nn.Linear(hidden_dim // 2, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = bert_output.last_hidden_state

        transformer_output = self.transformer_encoder(hidden_states)

        cls_output = transformer_output[:, 0, :] 

        x = self.fc1(self.dropout(cls_output))
        x = torch.relu(x)
        x = self.fc2(self.dropout(x))
        return x

num_classes = len(class_mapping)
model = ComplexTransformerModel(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=2e-5)

def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for input_ids, attention_mask, labels in loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return total_loss / len(loader), classification_report(all_labels, all_preds, target_names=class_mapping.keys())

num_epochs = 15
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    dev_loss, dev_report = evaluate(model, dev_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Dev Loss: {dev_loss:.4f}")
    print(dev_report)

torch.save(model.state_dict(), 'complex_transformer_model.pth')

awm_test_data = load_data('Data/AWM_test_with_labels.csv')
awt_test_data = load_data('Data/AWT_test_with_labels.csv')

test_data_with_labels = pd.concat([awm_test_data, awt_test_data])
test_data_with_labels['Class'] = test_data_with_labels['Class'].str.lower()
test_data_with_labels['Class'] = label_encoder.transform(test_data_with_labels['Class'])

test_dataset_with_labels = TextDataset(test_data_with_labels['Text'].tolist(), test_data_with_labels['Class'].tolist())
test_loader_with_labels = DataLoader(test_dataset_with_labels, batch_size=16)

test_loss, test_report = evaluate(model, test_loader_with_labels, criterion, device)
print(f"Test Loss: {test_loss:.4f}")
print(test_report)


## Bi-LSTM Model

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertModel
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_data(file_path, is_test=False):
    data = pd.read_csv(file_path)
    if is_test:
        return data[['id', 'Text']]
    else:
        return data[['Text', 'Class']]

train_data = pd.concat([load_data('Data/AWM_train.csv'), load_data('Data/AWT_train.csv'), load_data('Data/AWM_dev.csv'), load_data('Data/AWT_dev.csv')])
dev_data = pd.concat([load_data('Data/AWM_dev.csv'), load_data('Data/AWT_dev.csv')])
test_data = pd.concat([load_data('Data/AWM_test_without_labels.csv', is_test=True), load_data('Data/AWT_test_without_labels.csv', is_test=True)])

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

from sklearn.preprocessing import LabelEncoder

train_data['Class'] = train_data['Class'].str.lower()
dev_data['Class'] = dev_data['Class'].str.lower()

label_encoder = LabelEncoder()
train_data['Class'] = label_encoder.fit_transform(train_data['Class'])
dev_data['Class'] = label_encoder.transform(dev_data['Class'])

class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Class Mapping:", class_mapping)

class TextDataset(Dataset):
    def __init__(self, texts, labels=None, tokenizer=tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors="pt"
        )
        input_ids = inputs['input_ids'].squeeze(0)
        attention_mask = inputs['attention_mask'].squeeze(0)

        if self.labels is not None:
            label = torch.tensor(self.labels[idx], dtype=torch.long)
            return input_ids, attention_mask, label
        return input_ids, attention_mask

train_dataset = TextDataset(train_data['Text'].tolist(), train_data['Class'].tolist())
dev_dataset = TextDataset(dev_data['Text'].tolist(), dev_data['Class'].tolist())

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32)

class BiLSTMAttention(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, dropout=0.3):
        super(BiLSTMAttention, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=1, bidirectional=True, batch_first=True)
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        
        lstm_out, _ = self.lstm(bert_output.last_hidden_state)
        attn_weights = torch.softmax(self.attention(lstm_out).squeeze(-1), dim=1)
        context_vector = torch.sum(attn_weights.unsqueeze(-1) * lstm_out, dim=1)
        output = self.fc(self.dropout(context_vector))
        return output

embedding_dim = 768
hidden_dim = 256
output_dim = len(train_data['Class'].unique())

model = BiLSTMAttention(embedding_dim, hidden_dim, output_dim).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for input_ids, attention_mask, labels in loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return total_loss / len(loader), classification_report(all_labels, all_preds)

num_epochs = 15
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    dev_loss, dev_report = evaluate(model, dev_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Dev Loss: {dev_loss:.4f}")
    print(dev_report)

torch.save(model.state_dict(), 'bilstm_attention_model.pth')

awm_test_data = load_data('Data/AWM_test_with_labels.csv')
awt_test_data = load_data('Data/AWT_test_with_labels.csv')

test_data_with_labels = pd.concat([awm_test_data, awt_test_data])
test_data_with_labels['Class'] = test_data_with_labels['Class'].str.lower()
test_data_with_labels['Class'] = label_encoder.transform(test_data_with_labels['Class'])

test_dataset_with_labels = TextDataset(
    test_data_with_labels['Text'].tolist(),
    test_data_with_labels['Class'].tolist()
)
test_loader_with_labels = DataLoader(test_dataset_with_labels, batch_size=32)

def test_model(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return total_loss / len(loader), classification_report(all_labels, all_preds, target_names=class_mapping.keys())

model.load_state_dict(torch.load('bilstm_attention_model.pth'))
model.to(device)

test_loss, test_report = test_model(model, test_loader_with_labels, criterion, device)
print(f"Test Loss: {test_loss:.4f}")
print(test_report)
