In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m83.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m110.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.4


In [3]:
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import accuracy_score


In [4]:
df_train = pd.read_csv("/content/drive/MyDrive/NEEWWWWW/DATA_NEW/PC/1000/train_800.csv")
df_test = pd.read_csv("/content/drive/MyDrive/NEEWWWWW/DATA_NEW/PC/1000/test_200.csv")

In [5]:
class CustomDataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = str(self.df.iloc[idx]['text'])
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )

        return {
            'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
            'labels': torch.tensor(self.df.iloc[idx]['label'], dtype=torch.long)
        }

In [6]:
max_len = 128
batch_size = 32
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_dataset = CustomDataset(df_train, tokenizer, max_len)
test_dataset = CustomDataset(df_test, tokenizer, max_len)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [7]:
# class MultiChannelModel(torch.nn.Module):
#     def __init__(self):
#         super(MultiChannelModel, self).__init__()

#         self.bert = BertModel.from_pretrained('bert-base-uncased')
#         self.lstm = torch.nn.LSTM(input_size=768, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)
#         self.fc = torch.nn.Linear(256, 2)
    
#     def forward(self, input_ids, attention_mask):
#         bert_output = self.bert(input_ids, attention_mask=attention_mask)
#         lstm_output, _ = self.lstm(bert_output[0])
#         logits = self.fc(lstm_output[:, -1, :])

#         return logits

In [8]:
class MultiChannelModel(torch.nn.Module):
    def __init__(self):
        super(MultiChannelModel, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.lstm = torch.nn.LSTM(input_size=768, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)
        self.lstm2 = torch.nn.LSTM(input_size=256, hidden_size=64, num_layers=2, batch_first=True, bidirectional=True)
        self.fc = torch.nn.Linear(128, 2)
    
    def forward(self, input_ids, attention_mask):
        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        lstm_output, _ = self.lstm(bert_output[0])
        lstm_output2, _ = self.lstm2(lstm_output)
        logits = self.fc(lstm_output2[:, -1, :])

        return logits

In [9]:
# class MultiChannelModel(torch.nn.Module):
#     def __init__(self):
#         super(MultiChannelModel, self).__init__()

#         self.bert1 = BertModel.from_pretrained('bert-base-uncased')
#         self.bert2 = BertModel.from_pretrained('bert-base-cased')
#         self.lstm = torch.nn.LSTM(input_size=1536, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)
#         self.fc = torch.nn.Linear(256, 2)
    
#     def forward(self, input_ids1, attention_mask1, input_ids2=None, attention_mask2=None):
#         bert_output1 = self.bert1(input_ids1, attention_mask=attention_mask1)[0]
        
#         if input_ids2 is not None and attention_mask2 is not None:
#             bert_output2 = self.bert2(input_ids2, attention_mask=attention_mask2)[0]
#             # concatenate the output of the two BERT models along the feature dimension
#             bert_output = torch.cat((bert_output1, bert_output2), dim=-1)
#         else:
#             bert_output = bert_output1
        
#         lstm_output, _ = self.lstm(bert_output)
#         logits = self.fc(lstm_output[:, -1, :])

#         return logits

In [10]:
# # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
# model = MultiChannelModel().to(device)
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

# num_epochs = 10

# for epoch in range(num_epochs):
#     train_loss = 0
#     train_correct = 0
#     train_total = 0
#     model.train()

#     for batch in train_dataloader:
#         input_ids1 = batch['input_ids1'].to(device)
#         attention_mask1 = batch['attention_mask1'].to(device)
#         input_ids2 = batch['input_ids2'].to(device)
#         attention_mask2 = batch['attention_mask2'].to(device)
#         labels = batch['labels'].to(device)

#         optimizer.zero_grad()
#         logits = model(input_ids1, attention_mask1, input_ids2, attention_mask2)
#         loss = criterion(logits, labels)
#         loss.backward()
#         optimizer.step()

#         train_loss += loss.item()
#         _, predicted = logits.max(1)
#         train_correct += predicted.eq(labels).sum().item()
#         train_total += labels.size(0)

#     train_acc = 100. * train_correct / train_total
#     train_loss /= len(train_dataloader)

#     test_loss = 0
#     test_correct = 0
#     test_total = 0
#     model.eval()
#     all_labels = []
#     all_preds = []

#     with torch.no_grad():
#         for batch in test_dataloader:
#             input_ids1 = batch['input_ids1'].to(device)
#             attention_mask1 = batch['attention_mask1'].to(device)
#             input_ids2 = batch['input_ids2'].to(device)
#             attention_mask2 = batch['attention_mask2'].to(device)
#             labels = batch['labels'].to(device)

#             logits = model(input_ids1, attention_mask1, input_ids2, attention_mask2)
#             loss = criterion(logits, labels)

#             test_loss += loss.item()
#             _, predicted = logits.max(1)
#             test_correct += predicted.eq(labels).sum().item()
#             test_total += labels.size(0)

#             all_labels.extend(labels.cpu().numpy().tolist())
#             all_preds.extend(predicted.cpu().numpy().tolist())

#     test_acc = 100. * test_correct / test_total
#     test_loss /= len(test_dataloader)

#     print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f} - Test Loss: {test_loss:.4f} - Train Acc: {train_acc:.4f} - Test Acc: {test_acc:.4f}")

# final_acc = accuracy_score(all_labels, all_preds)
# print(f"Final Accuracy: {final_acc:.4f}")


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiChannelModel().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

num_epochs = 30

for epoch in range(num_epochs):
    train_loss = 0
    train_correct = 0
    train_total = 0
    model.train()

    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = logits.max(1)
        train_correct += predicted.eq(labels).sum().item()
        train_total += labels.size(0)

    train_acc = 100. * train_correct / train_total
    train_loss /= len(train_dataloader)

    test_loss = 0
    test_correct = 0
    test_total = 0
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in test_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

            test_loss += loss.item()
            _, predicted = logits.max(1)
            test_correct += predicted.eq(labels).sum().item()
            test_total += labels.size(0)

            all_labels.extend(labels.cpu().numpy().tolist())
            all_preds.extend(predicted.cpu().numpy().tolist())

    test_acc = 100. * test_correct / test_total
    test_loss /= len(test_dataloader)

    print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f} - Test Loss: {test_loss:.4f} - Train Acc: {train_acc:.4f} - Test Acc: {test_acc:.4f}")

final_acc = accuracy_score(all_labels, all_preds)
print(f"Final Accuracy: {final_acc:.4f}")

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Epoch 1 - Train Loss: 0.6841 - Test Loss: 0.6633 - Train Acc: 50.0000 - Test Acc: 52.0000
Epoch 2 - Train Loss: 0.6153 - Test Loss: 0.6281 - Train Acc: 86.3750 - Test Acc: 78.5000
Epoch 3 - Train Loss: 0.5383 - Test Loss: 0.5554 - Train Acc: 93.5000 - Test Acc: 86.0000
Epoch 4 - Train Loss: 0.4612 - Test Loss: 0.5651 - Train Acc: 95.8750 - Test Acc: 77.5000
Epoch 5 - Train Loss: 0.3952 - Test Loss: 0.5345 - Train Acc: 96.3750 - Test Acc: 79.0000
Epoch 6 - Train Loss: 0.3367 - Test Loss: 0.4951 - Train Acc: 96.6250 - Test Acc: 83.0000
Epoch 7 - Train Loss: 0.3039 - Test Loss: 0.4380 - Train Acc: 95.7500 - Test Acc: 86.0000
Epoch 8 - Train Loss: 0.2473 - Test Loss: 0.4403 - Train Acc: 97.6250 - Test Acc: 86.5000
Epoch 9 - Train Loss: 0.2148 - Test Loss: 0.4615 - Train Acc: 97.8750 - Test Acc: 84.5000
Epoch 10 - Train Loss: 0.1938 - Test Loss: 0.4788 - Train Acc: 97.7500 - Test Acc: 83.5000
Epoch 11 - Train Loss: 0.1740 - Test Loss: 0.4440 - Train Acc: 97.8750 - Test Acc: 86.5000
Epoch 12

In [12]:
torch.save(model.state_dict(), "/content/drive/MyDrive/NEEWWWWW/DATA_NEW/PC/1000/multi_channel_model.pt")