In [3]:
# Load the pre-trained BERT model
from transformers import BertForSequenceClassification, BertTokenizer
import torch
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=1)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [5]:
# Prepare the data
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('preprocess.csv')
X = df['PRODUCT'].tolist()
y = df['PRODUCT_LENGTH'].tolist()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [None]:
# Tokenize the input
encoded_train = tokenizer(X_train, padding=True, truncation=True, return_tensors='pt')
encoded_test = tokenizer(X_test, padding=True, truncation=True, return_tensors='pt')

In [None]:
# Prepare the input
input_ids_train = encoded_train['input_ids']
attention_masks_train = encoded_train['attention_mask']
segment_ids_train = encoded_train['token_type_ids']

input_ids_test = encoded_test['input_ids']
attention_masks_test = encoded_test['attention_mask']
segment_ids_test = encoded_test['token_type_ids']

In [None]:
y_train_tensor = torch.tensor(y_train).unsqueeze(1)

# Train the model
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=1e-5)
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(input_ids_train, attention_masks_train, segment_ids_train, labels=y_train_tensor)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Evaluate the model
from sklearn.metrics import mean_squared_error

model.eval()
with torch.no_grad():
    outputs = model(input_ids_test, attention_masks_test, segment_ids_test)
    y_pred = outputs.logits.squeeze(1).tolist()
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)