In [2]:
import pandas as pd
from transformers import MarianTokenizer, MarianMTModel, AlbertForSequenceClassification, AlbertTokenizer
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
from tqdm import tqdm
import networkx as nx
import torch

In [3]:
Dataset_file = "TV.csv"
df = pd.read_csv(Dataset_file)

In [4]:
label_encoder = LabelEncoder()
label_encoder.fit(df['Cảm xúc'])

In [5]:
model_name = 'albert-base-v2'  # You can choose a different variant
tokenizer = AlbertTokenizer.from_pretrained(model_name)
model = AlbertForSequenceClassification.from_pretrained(model_name, num_labels=4)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.bias', 'predictions.decoder.weight', 'predictions.dense.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You sho

In [6]:
class SentimentDataset(df):
    def __init__(self, dataframe, tokenizer, label_encoder, max_length):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = self.data.iloc[index]['Nhận xét đánh giá']
        sentiment = self.data.iloc[index]['Cảm xúc']

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        )

        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()

        label = self.label_encoder.transform([sentiment])[0]

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'label': label
        }

In [9]:
max_seq_length = 128
batch_size = 32
asdf = pd.read_csv(Dataset_file)

train_dataset = SentimentDataset(asdf, tokenizer, label_encoder, max_seq_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()

TypeError: 'DataFrame' object is not callable

In [None]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for batch in progress_bar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)  # Use the 'label' field from the dataset

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

        progress_bar.set_postfix({'Loss': loss.item(), 'GPU Memory': torch.cuda.memory_allocated(device=device)})

    average_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss}")

# Save the trained model
model.save_pretrained("albert_sentiment_model")