In [1]:
import pandas as pd
import numpy as np

In [2]:
data_path = "C:\\Sowmya\\Personal\\PYTORCH\\Pytorch_stuffs\\BERT\\Twitter Sentiment Analysis\\twitter.csv"

col = ['Tweet ID', 'entity', 'sentiment', 'Tweet content']
df = pd.read_csv(data_path, names=col)

In [3]:
df.head()

Unnamed: 0,Tweet ID,entity,sentiment,Tweet content
0,3364,Facebook,Irrelevant,I mentioned on Facebook that I was struggling ...
1,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
2,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
3,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
4,4433,Google,Neutral,Now the President is slapping Americans in the...


In [4]:
df.drop(['Tweet ID', 'entity'], axis=1, inplace=True)
df.head()

Unnamed: 0,sentiment,Tweet content
0,Irrelevant,I mentioned on Facebook that I was struggling ...
1,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
2,Negative,@Microsoft Why do I pay for WORD when it funct...
3,Negative,"CSGO matchmaking is so full of closet hacking,..."
4,Neutral,Now the President is slapping Americans in the...


In [5]:
df.isnull().sum()

sentiment        0
Tweet content    0
dtype: int64

In [6]:
df.sentiment.unique()

array(['Irrelevant', 'Neutral', 'Negative', 'Positive'], dtype=object)

In [7]:
sentiment_mapping = {
    'Irrelevant': 0,
    'Neutral': 1,
    'Positive': 2,
    'Negative': 3
}

df['sentiment'] = df['sentiment'].map(sentiment_mapping)
df.head()

Unnamed: 0,sentiment,Tweet content
0,0,I mentioned on Facebook that I was struggling ...
1,1,BBC News - Amazon boss Jeff Bezos rejects clai...
2,3,@Microsoft Why do I pay for WORD when it funct...
3,3,"CSGO matchmaking is so full of closet hacking,..."
4,1,Now the President is slapping Americans in the...


In [8]:
df['sentiment'].value_counts()

sentiment
1    285
2    277
3    266
0    172
Name: count, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(df, test_size=0.15, random_state=42)

In [10]:
len(train_data), len(test_data)

(850, 150)

In [11]:
df.columns

Index(['sentiment', 'Tweet content'], dtype='object')

In [12]:
MAX_LEN = 256
TRAIN_BATCH_SIZE = 16
TEST_BATCH_SIZE = 8

In [13]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer
from transformers import AdamW

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
class BERTDataset:
    def __init__(self, tweet, sentiment, tokenizer):
        self.tweet = tweet
        self.sentiment = sentiment 
        self.tokenizer = tokenizer
        self.max_len = MAX_LEN 

    def __len__(self):
        return len(self.tweet)
    
    def __getitem__(self, idx):
        tweet = str(self.tweet[idx])
        tweet = " ".join(tweet.split())

        sentiment = self.sentiment[idx]

        inputs = self.tokenizer.encode_plus(
            tweet, None, add_special_tokens=True, max_length=self.max_len, padding='max_length', truncation=True, return_tensors='pt'
        )

        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]

        return {
            "ids": torch.tensor(ids, dtype=torch.long).flatten(),
            "mask": torch.tensor(mask, dtype=torch.long).flatten(),
            "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long).flatten(),
            "sentiment": torch.tensor(sentiment, dtype=torch.float),
            # "sentiment": torch.FloatTensor(sentiment)
        }

In [15]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")



In [16]:
train_df = BERTDataset(train_data['Tweet content'].values, train_data['sentiment'].values, tokenizer)
valid_df = BERTDataset(test_data['Tweet content'].values, test_data['sentiment'].values, tokenizer)

In [17]:
len(train_df), len(valid_df)

(850, 150)

In [18]:
train_data_loader = torch.utils.data.DataLoader(
    train_df,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

valid_data_loader = torch.utils.data.DataLoader(
    valid_df,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

In [19]:
len(train_data_loader), len(valid_data_loader)

(54, 19)

In [20]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
device

'cpu'

In [21]:
num_labels = len(train_data['sentiment'].unique())
num_labels

4

In [22]:
class BERTClassifier(nn.Module):
    def __init__(self):
        super(BERTClassifier, self).__init__()
        self.bert_model = BertModel.from_pretrained("bert-base-uncased", return_dict=True)
        self.dropout = nn.Dropout(0.3)
        self.linear = nn.Linear(768, 4)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, ids, mask, token_type_ids):
        outputs = self.bert_model(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_dropout = self.dropout(outputs.pooler_output)
        output = self.linear(output_dropout)
        output = self.softmax(output)

        return output


In [23]:
model = BERTClassifier()
model.to(device)

BERTClassifier(
  (bert_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elem

In [24]:
# def loss_fn(outputs, targets):
#     return nn.CrossEntropyLoss()(outputs, targets)

loss_fn = nn.CrossEntropyLoss().to(device)


optimizer = AdamW(model.parameters(), lr=2e-5)

EPOCHS = 3
n_total_steps = len(train_data_loader)



In [25]:
val_targets = []
val_outputs = []

In [26]:
def train_fn(train_data_loader, valid_data_loader, epochs, model, optimizer, device):

    model.train()

    for epoch in range(1, epochs+1):

        train_loss = 0
        valid_loss = 0

        for i, data in enumerate(train_data_loader):
            ids = data["ids"].to(device, dtype=torch.long)
            mask = data["mask"].to(device, dtype=torch.long)
            token_type_ids = data["token_type_ids"].to(device, dtype=torch.long)
            sentiments = data["sentiment"].to(device, dtype=torch.float)

            outputs = model(ids, mask, token_type_ids)
            optimizer.zero_grad()

            _, preds = torch.max(outputs, dim=1)
            preds = preds.float().requires_grad_()

            loss = loss_fn(preds, sentiments)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss = train_loss + ((1/(i+1)) * (loss.item() - train_loss))

            if i % 10 == 0:
                print(f"Epoch {epoch}/ {epochs}, batch {i+1}/ {n_total_steps}, loss = {loss.item():.4f}")
        
        print(f"####### Epoch {epoch}: Training End ################")

        print(f'###### Epoch {epoch}: Validation Start ############')

        model.eval()

        with torch.no_grad():
            for i, data in enumerate(valid_data_loader):
                ids = data["ids"].to(device, dtype=torch.long)
                mask = data["mask"].to(device, dtype=torch.long)
                token_type_ids = data["token_type_ids"].to(device, dtype=torch.long)
                sentiments = data["sentiment"].to(device, dtype=torch.float)

                outputs = model(ids, mask, token_type_ids)

                _, preds = torch.max(outputs, dim=1)
                preds = preds.float()

                loss = loss_fn(preds, sentiments)

                valid_loss = valid_loss + ((1/(i+1)) * (loss.item() - train_loss))

                val_targets.append(sentiments.cpu().detach().numpy().tolist())
                val_outputs.append(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

            print(f"####### Epoch {epoch}: Validation End ################")

            train_loss = train_loss/len(train_data_loader)
            valid_loss = valid_loss/len(valid_data_loader)

    print(f"Epoch: {epoch} \nAverage Training Loss: {train_loss:.6f} \tAverage Validation Loss: {valid_loss:.6f}")

    return model




In [27]:
trained_model = train_fn(train_data_loader=train_data_loader,
                         valid_data_loader=valid_data_loader,
                         epochs=EPOCHS,
                         model=model,
                         optimizer=optimizer,
                         device=device)

  "ids": torch.tensor(ids, dtype=torch.long).flatten(),
  "mask": torch.tensor(mask, dtype=torch.long).flatten(),
  "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long).flatten(),


Epoch 1/ 3, batch 1/ 54, loss = 74.1608
Epoch 1/ 3, batch 11/ 54, loss = 67.6058
Epoch 1/ 3, batch 21/ 54, loss = 97.3934
Epoch 1/ 3, batch 31/ 54, loss = 70.1361
Epoch 1/ 3, batch 41/ 54, loss = 67.5592
Epoch 1/ 3, batch 51/ 54, loss = 69.2395
####### Epoch 1: Training End ################
###### Epoch 1: Validation Start ############
####### Epoch 1: Validation End ################
Epoch 2/ 3, batch 1/ 54, loss = 85.9502
Epoch 2/ 3, batch 11/ 54, loss = 80.4051
Epoch 2/ 3, batch 21/ 54, loss = 69.3147
Epoch 2/ 3, batch 31/ 54, loss = 94.2680
Epoch 2/ 3, batch 41/ 54, loss = 74.8599
Epoch 2/ 3, batch 51/ 54, loss = 85.9502
####### Epoch 2: Training End ################
###### Epoch 2: Validation Start ############
####### Epoch 2: Validation End ################
Epoch 3/ 3, batch 1/ 54, loss = 74.8599
Epoch 3/ 3, batch 11/ 54, loss = 60.9970
Epoch 3/ 3, batch 21/ 54, loss = 66.5421
Epoch 3/ 3, batch 31/ 54, loss = 61.1102
Epoch 3/ 3, batch 41/ 54, loss = 66.5421
Epoch 3/ 3, batch 51/ 

In [36]:
torch.save(trained_model.state_dict(), "C:\\Sowmya\\Personal\\PYTORCH\\Pytorch_stuffs\\BERT\\Twitter Sentiment Analysis\\twitter_model.pth")

In [38]:
example = df['Tweet content'][1]

encodings = tokenizer.encode_plus(
    example,
    None,
    add_special_tokens=True,
    max_length=MAX_LEN,
    padding='max_length',
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt'
)

model.eval()

with torch.no_grad():
    input_ids = encodings['input_ids'].to(device, dtype=torch.long)
    attention_mask = encodings['attention_mask'].to(device, dtype=torch.long)
    token_type_ids = encodings['token_type_ids'].to(device, dtype=torch.long)

    output = model(input_ids, attention_mask, token_type_ids)

    final_output = np.argmax(output, axis=1).detach().cpu().numpy().tolist()
    print("Final Output: ", final_output)
    print("ANSWER: ", df['sentiment'][1])

AttributeError: 'collections.OrderedDict' object has no attribute 'eval'