# Installing Packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers

In [None]:
!pip install torch

# Training and Evaluating BERT Model

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertForSequenceClassification, BertTokenizerFast, AdamW
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Load the data
train_data = pd.read_csv('/content/drive/MyDrive/train1.csv')
dev_data = pd.read_csv('/content/drive/MyDrive/dev1.csv')

# Create a PyTorch Dataset
class ArgumentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = str(self.texts[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Set some parameters
max_len = 256
batch_size = 8
epochs = 3

# Load the BERT tokenizer and model
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Create the PyTorch DataLoaders
train_dataset = ArgumentDataset(train_data['sentences'], train_data['classes'], tokenizer, max_len)
dev_dataset = ArgumentDataset(dev_data['sentences'], dev_data['classes'], tokenizer, max_len)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

# Use a GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Start training
optimizer = AdamW(model.parameters(), lr=1e-5)
for epoch in range(epochs):
    total_loss = 0
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_loader)
    print('Average training loss for epoch {}: '.format(epoch+1), avg_train_loss)

    # Evaluate on the dev set after each epoch
    model.eval()
    preds = []
    true = []
    with torch.no_grad():
        for batch in dev_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            preds.extend(torch.argmax(outputs[0], axis=1).cpu().numpy())
            true.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true, preds)
    precision = precision_score(true, preds)
    recall = recall_score(true, preds)
    f1 = f1_score(true, preds)
    micro_f1 = f1_score(true, preds, average='micro')
    macro_f1 = f1_score(true, preds, average='macro')

    print('Accuracy on the dev set for epoch {}: {:.2%}'.format(epoch+1, accuracy))
    print('Precision on the dev set for epoch {}: {:.2%}'.format(epoch+1, precision))
    print('Recall on the dev set for epoch {}: {:.2%}'.format(epoch+1, recall))
    print('F1 Score on the dev set for epoch {}: {:.2%}'.format(epoch+1, f1))
    print('Micro-F1 Score on the dev set for epoch {}: {:.2%}'.format(epoch+1, micro_f1))
    print('Macro-F1 Score on the dev set for epoch {}: {:.2%}'.format(epoch+1, macro_f1))

# Evaluate on the dev set after all epochs
model.eval()
preds = []
true = []
with torch.no_grad():
    for batch in dev_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        preds.extend(torch.argmax(outputs[0], axis=1).cpu().numpy())
        true.extend(labels.cpu().numpy())

accuracy = accuracy_score(true, preds)
print('Final accuracy on the dev set: ', accuracy)


# After all epochs
precision = precision_score(true, preds)
recall = recall_score(true, preds)
f1 = f1_score(true, preds)
micro_f1 = f1_score(true, preds, average='micro')
macro_f1 = f1_score(true, preds, average='macro')
print('Final precision on the dev set: {:.2%}'.format(precision))
print('Final recall on the dev set: {:.2%}'.format(recall))
print('Final F1 Score on the dev set: {:.2%}'.format(f1))
print('Final Micro-F1 Score on the dev set: {:.2%}'.format(micro_f1))
print('Final Macro-F1 Score on the dev set: {:.2%}'.format(macro_f1))

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Average training loss for epoch 1:  0.5507922134909433
Accuracy on the dev set for epoch 1: 74.82%
Precision on the dev set for epoch 1: 76.14%
Recall on the dev set for epoch 1: 68.55%
F1 Score on the dev set for epoch 1: 72.15%
Micro-F1 Score on the dev set for epoch 1: 74.82%
Macro-F1 Score on the dev set for epoch 1: 74.59%
Average training loss for epoch 2:  0.4395859113519954
Accuracy on the dev set for epoch 2: 74.10%
Precision on the dev set for epoch 2: 70.51%
Recall on the dev set for epoch 2: 78.31%
F1 Score on the dev set for epoch 2: 74.20%
Micro-F1 Score on the dev set for epoch 2: 74.10%
Macro-F1 Score on the dev set for epoch 2: 74.10%
Average training loss for epoch 3:  0.3153866097983933
Accuracy on the dev set for epoch 3: 75.44%
Precision on the dev set for epoch 3: 71.82%
Recall on the dev set for epoch 3: 79.61%
F1 Score on the dev set for epoch 3: 75.51%
Micro-F1 Score on the dev set for epoch 3: 75.44%
Macro-F1 Score on the dev set for epoch 3: 75.44%
Final accu

# Saving the Trained BERT Model

In [None]:
output_model_file = "/content/drive/MyDrive/BERT_saved_model.pth"
output_vocab_file = './'

model_to_save = model
torch.save(model_to_save, output_model_file)
tokenizer.save_vocabulary(output_vocab_file)

print('All files saved')

All files saved


# Test and Save the results

In [None]:
import torch.nn.functional as F

def predict(sentence):
    model.eval()
    inputs = tokenizer.encode_plus(
        sentence,
        None,
        add_special_tokens=True,
        max_length=max_len,
        pad_to_max_length=True,
        return_token_type_ids=False,
        return_tensors='pt'
    )
    ids = inputs['input_ids'].to(device)
    mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(ids, mask)
        probs = F.softmax(outputs[0], dim=-1)
        _, predicted = torch.max(probs, dim=-1)

    sentiment = "1" if predicted.item() == 1 else "0"
    #sentiment = "1(Claim)" if predicted.item() == 1 else "0(Premise)"
    return sentiment

sentences_premise = [
    "It's a number that is incredibly competitive in our industry, and we want to continue to keep it that way.",
    "See, first of all, I'd say the opportunity for our shareholders when they think about Microsoft has never been better.",
    "APAC benefited from some of the product optimizations we did in the quarter."
]

for sentence in sentences_premise:
    sentiment = predict(sentence)
    print(f"Sentence: {sentence}")
    print(f"Sentiment: {sentiment}")
    print()


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Sentence: It's a number that is incredibly competitive in our industry, and we want to continue to keep it that way.
Sentiment: 1

Sentence: See, first of all, I'd say the opportunity for our shareholders when they think about Microsoft has never been better.
Sentiment: 1

Sentence: APAC benefited from some of the product optimizations we did in the quarter.
Sentiment: 1





In [None]:
import torch.nn.functional as F

def predict(sentence):
    model.eval()
    inputs = tokenizer.encode_plus(
        sentence,
        None,
        add_special_tokens=True,
        max_length=max_len,
        pad_to_max_length=True,
        return_token_type_ids=False,
        return_tensors='pt'
    )
    ids = inputs['input_ids'].to(device)
    mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(ids, mask)
        probs = F.softmax(outputs[0], dim=-1)
        _, predicted = torch.max(probs, dim=-1)

    sentiment = "1" if predicted.item() == 1 else "0"
    #sentiment = "1(Claim)" if predicted.item() == 1 else "0(Premise)"
    return sentiment

sentences_claim = [
    "Yeah, for the quarter, so it was a strong quarter.",
    "Of course we started in Music with Prime Music, a little bit earlier in that space, and I think that's been a great way for Prime members to enjoy some of that catalog for free, and then as they enjoy that, be able to move into the Amazon Music Unlimited skill.",
    "Even while in International, we're continuing to invest in a lot of areas, we continue to frontload Prime benefits for the newer geographies, we continue to launch new countries as we launch Prime in Australia recently.",
    "First of all, I want to remind you that Q3 is typically a lower operating income quarter as we're preparing for the Q4 holiday peak."
]

for sentence_claim in sentences_claim:
    sentiment1 = predict(sentence_claim)
    print(f"Sentence: {sentence_claim}")
    print(f"Sentiment: {sentiment1}")
    print()


Sentence: Yeah, for the quarter, so it was a strong quarter.
Sentiment: 1

Sentence: Of course we started in Music with Prime Music, a little bit earlier in that space, and I think that's been a great way for Prime members to enjoy some of that catalog for free, and then as they enjoy that, be able to move into the Amazon Music Unlimited skill.
Sentiment: 0

Sentence: Even while in International, we're continuing to invest in a lot of areas, we continue to frontload Prime benefits for the newer geographies, we continue to launch new countries as we launch Prime in Australia recently.
Sentiment: 1

Sentence: First of all, I want to remind you that Q3 is typically a lower operating income quarter as we're preparing for the Q4 holiday peak.
Sentiment: 1



In [None]:
import json

with open('/content/drive/MyDrive/ECC_Argument_Classification_Test.json', 'r') as f:
    test_data = json.load(f)

for data in test_data:
    sentence = data['sentence']
    sentiment = predict(sentence)
    data['Prediction'] = sentiment

with open('/content/drive/MyDrive/BERT_test_predicted.json', 'w') as f:
    json.dump(test_data, f)