In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from functools import reduce
from sklearn.utils.class_weight import compute_class_weight
from transformers import DistilBertTokenizer, DistilBertModel, AutoModelForSequenceClassification
from transformers import BertTokenizer, BertModel
from collections import Counter
import xgboost

dataset = pd.read_csv("clean_COVIDSenti.csv")
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

def tokenize(tweet):
    tokenized = tokenizer(tweet, padding="max_length", max_length = 47)
    return tokenized

tweets, labels = dataset['tweet'], dataset['label'] + 1
tokenized_tweets = tweets.map(tokenize)

In [25]:
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
#Turning into dataloaders
batch_size = 64
device = torch.device("mps")

class TweetDataset(Dataset):
    def __init__(self, tweets, labels):
        self.x = tweets
        self.y = labels
        
    def __getitem__(self, index):
        x = self.x.iloc[index]
        x = {key: torch.tensor(val) for key, val in x.items()}
        y = self.y.iloc[index]
        return (x, y)
    
    def __len__(self):
        return len(self.x)

#Dealing with imbalanced class weights for train dataset

#Sampling data and creating train/val/test sets
data = TweetDataset(tokenized_tweets, labels)
data_loader = DataLoader(data, batch_size=batch_size, shuffle = False)

model = DistilBertModel.from_pretrained('distilbert-base-uncased')
print(model.named_parameters)

full_output = None
for inputs, labels in data_loader:
        inputs['input_ids'], inputs['attention_mask'] = inputs['input_ids'].to(device), inputs['attention_mask'].to(device)
        labels = labels.to(device)
        output = model(**inputs)
        output = output['last_hidden_state']
        if full_output == None:
            full_output = output
        else:
            full_output = torch.concat((output, full_output))

print(full_output)

<bound method Module.named_parameters of DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): DistilBertSdpaAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_feat

AttributeError: 'Tensor' object has no attribute 'iloc'

In [None]:
class SelfAttention(nn.Module):
    def __init__(self, d_model):
        super().__init__()
        self.dim_sqrt = torch.sqrt(d_model)
        self.Wq = nn.Parameter(torch.rand((d_model, d_model)))
        self.Wv = nn.Parameter(torch.rand((d_model, d_model)))
        self.Wk = nn.Parameter(torch.rand((d_model, d_model)))
        
        self.softmax = nn.Softmax(dim = -1)

    def forward(self, x):
        Q = torch.matmul(x, self.Wq)
        V = torch.matmul(x, self.Wv)
        K = torch.matmul(x, self.Wk)
        
        output = torch.matmul(Q, K.transpose(-2, -1)) / self.dim_sqrt
        output = self.softmax(output)
        output = torch.matmul(output, V)
        
        return output
    
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, heads):
        super().__init__()
        self.n_heads = heads
        self.dim_sqrt = torch.sqrt(d_model)
        self.matrices = []
        for _ in heads:
            Wq = nn.Parameter(torch.rand((d_model, d_model)))
            Wv = nn.Parameter(torch.rand((d_model, d_model)))
            Wk = nn.Parameter(torch.rand((d_model, d_model)))
            
            head_dict = {
                'Wq': Wq, 
                'Wv': Wv, 
                'Wk': Wk
            }
            
            self.matrices.append(head_dict)
            
    def forward(self, x):
        outputs = []
        for head in self.n_heads:
            head_dict = self.matrices[head]
            
            Q = torch.matmul(x, head_dict['Wq'])
            V = torch.matmul(x, head_dict['Wv'])
            K = torch.matmul(x, head_dict['Wk'])
            
            output = torch.matmul(Q, K.transpose(-2, -1)) / self.dim_sqrt
            output = self.softmax(output)
            output = torch.matmul(output, V)
            
            outputs.append(output)
        
        outputs = torch.concat(outputs)
        outputs = outputs.mean(dim = 0)
        
        return outputs
        