In [9]:
import torch
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, TensorDataset

In [10]:
# Load train and test data
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
print(device)

cuda


In [None]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.to(device)

In [17]:
def encode_data(data):
    inputs = tokenizer(data['utterance1'].tolist(), 
                       data['utterance2'].tolist(), 
                       padding=True, 
                       truncation=True, 
                       return_tensors='pt')
    labels = torch.tensor(data['label'].tolist())
    return inputs.to(device), labels.to(device)

train_inputs, train_labels = encode_data(train_data)
train_dataset = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Fine-tune BERT model
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    average_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {average_loss:.4f}")

Epoch 1/5, Average Loss: 0.2019
Epoch 2/5, Average Loss: 0.1713
Epoch 3/5, Average Loss: 0.1367
Epoch 4/5, Average Loss: 0.1111
Epoch 5/5, Average Loss: 0.0942


In [18]:
def evaluate_model(model, inputs, labels):
    outputs = model(**inputs, labels=labels)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1)
    return predictions


In [19]:
# Encode test data
test_inputs, test_labels = encode_data(test_data)

# Perform classification
with torch.no_grad():
    model.eval()
    predictions = evaluate_model(model, test_inputs, test_labels)

predictions = predictions.tolist()

# Print classification report
print(classification_report(test_labels.tolist(), predictions))

              precision    recall  f1-score   support

           0       0.89      0.82      0.85       895
           1       0.66      0.76      0.70       395

    accuracy                           0.80      1290
   macro avg       0.77      0.79      0.78      1290
weighted avg       0.82      0.80      0.81      1290



In [1]:
!wget ftp://ftp.cs.rochester.edu/pub/papers/ai/93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z

--2024-03-29 20:51:47--  ftp://ftp.cs.rochester.edu/pub/papers/ai/93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z
           => ‘93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z’
Resolving ftp.cs.rochester.edu (ftp.cs.rochester.edu)... 128.151.167.12
Connecting to ftp.cs.rochester.edu (ftp.cs.rochester.edu)|128.151.167.12|:21... failed: Connection timed out.
Retrying.

--2024-03-29 20:53:57--  ftp://ftp.cs.rochester.edu/pub/papers/ai/93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z
  (try: 2) => ‘93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z’
Connecting to ftp.cs.rochester.edu (ftp.cs.rochester.edu)|128.151.167.12|:21... failed: Connection timed out.
Retrying.

--2024-03-29 20:56:09--  ftp://ftp.cs.rochester.edu/pub/papers/ai/93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps.Z
  (try: 3) => ‘93.tn2.prosody_and_discourse_structure_in_cooperative_dialogues_text.ps