In [2]:
import numpy as np
import pandas as pd

import logging

# Hide logging messages.
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
%env CATALYST_LOG_LEVEL = 15
#!pip install tensorflow-macos
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel


env: CATALYST_LOG_LEVEL=15


In [3]:
train_df = pd.read_csv('train.csv')
val_df = pd.read_csv('val.csv')
test_df = pd.read_csv('test.csv')
#set(val_df['author'].tolist()+train_df['author'].tolist())

In [10]:
from transformers import BertModel, BertTokenizer
import torch
from sklearn import metrics
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

# Load the pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)

for param in model.parameters():
    param.requires_grad = False

# Replace the classification layer with a max pooling layer
model.pooler = torch.nn.AdaptiveMaxPool1d(1)

# Replace the classification layer with a linear layer
model.classifier = torch.nn.Sequential(
    torch.nn.Linear(model.config.hidden_size, 47)
)

# Unfreeze the weights of the last 4 layers
for param in model.encoder.layer[-4:].parameters():
    param.requires_grad = True

# Fine-tune the model on your downstream task
train_dataset = train_df
dev_dataset = val_df
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
model.train()
model.to(device)
train_inputs = tokenizer(train_dataset['text'].tolist(),
                         padding=True,
                         truncation=True,
                         return_tensors='pt')
train_labels = torch.tensor(train_dataset.drop('text', axis=1).values)\
    .to(device, dtype = torch.float)
val_inputs = tokenizer(dev_dataset['text'].tolist(), padding=True, truncation=True, return_tensors='pt')
val_labels = torch.tensor(dev_dataset.drop('text', axis=1).values)\
    .to(device, dtype = torch.float)
for epoch in range(100):
    outputs = model(train_inputs['input_ids'].to(device, dtype = torch.long),
                    attention_mask=train_inputs['attention_mask'].to(device, dtype = torch.long),
                    token_type_ids=train_inputs['token_type_ids'].to(device, dtype = torch.long))
    pooled_output = outputs.last_hidden_state
    pooled_output = model.pooler(pooled_output.permute(0, 2, 1)).squeeze()
    logits = model.classifier(pooled_output)
    loss_fn = torch.nn.BCEWithLogitsLoss()
    loss = loss_fn(logits, train_labels)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    # Evaluate the model on the dev set
    with torch.no_grad():

        outputs = model(val_inputs['input_ids'].to(device, dtype = torch.long),
                        attention_mask=val_inputs['attention_mask'].to(device, dtype = torch.long),
                        token_type_ids=val_inputs['token_type_ids'].to(device, dtype = torch.long))
        pooled_output = outputs.last_hidden_state
        pooled_output = model.pooler(pooled_output.permute(0, 2, 1)).squeeze()
        logits = model.classifier(pooled_output)
        acc_outputs = np.array(torch.sigmoid(logits).cpu().detach().numpy().tolist()) >= 0.5
        acc = metrics.accuracy_score(val_labels.cpu().detach().numpy().tolist(),
                                     acc_outputs)
    print(f"Epoch {epoch + 1}:Loss:  {loss.item()}, Dev accuracy = {acc}")
model.eval()
# Use the fine-tuned model to make predictions on new data
test_dataset = test_df
with torch.no_grad():
    inputs = tokenizer(test_dataset['text'].tolist(), padding=True, truncation=True, return_tensors='pt')
    outputs = model(inputs['input_ids'].to(device, dtype = torch.long),
                    attention_mask=inputs['attention_mask'].to(device, dtype = torch.long),
                    token_type_ids=inputs['token_type_ids'].to(device, dtype = torch.long))
    pooled_output = outputs.last_hidden_state
    pooled_output = model.pooler(pooled_output.permute(0, 2, 1)).squeeze()
    logits = model.classifier(pooled_output)
    preds = torch.argmax(logits, dim=1)
    test_dataset['pred'] = preds.tolist()



Epoch 2:Loss:  0.7929664254188538, Dev accuracy = 0.0
Epoch 3:Loss:  0.7588768005371094, Dev accuracy = 0.0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1:Loss:  0.7998303174972534, Dev accuracy = 0.0


KeyboardInterrupt: 