In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.nn.functional as F
from torchsummary import summary
from transformers import BertModel, BertTokenizer
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
BERT_PATH = 'bert-base-uncased'
MAX_LEN = 64
DEVICE = "cuda"
BZ = 32
WD = 1e-6
LR = 1e-3
EPS = 100

In [3]:
merge = pd.read_csv("../data/merge.csv", index_col=0)
merge = shuffle(merge)
ds_size = merge.shape[0]

In [7]:
class Stock(Dataset):
    def __init__(self, df) -> None:
        self.df = df
        self.score = [float(score) for score in df["score"]]
        self.tokenizer = BertTokenizer.from_pretrained(BERT_PATH)
        self.content = [self.tokenizer(text,padding='max_length', 
                       max_length = MAX_LEN, 
                       truncation=True,
                       return_tensors="pt") for text in df["content"]]

    def __getitem__(self, idx):
        return self.score[idx], self.content[idx]

    def __len__(self):
        return self.df.shape[0]

In [8]:
t0, t1, t2 = np.split(merge.sample(frac=1, random_state=42), [int(.8*ds_size), int(.9*ds_size)])
dataset = {x: Stock(s) for x, s in [("train", t0), ("valid", t1), ("test", t2)]}
loader = {x: DataLoader(dataset[x], batch_size=BZ, num_workers=24, shuffle=True) 
                                            for x in ["train", "valid", "test"]}

In [9]:
for state in ["train", "valid", "test"]:
    sz = 0
    for num, t in loader[state]:
        sz += len(num)
    print(sz)

20222
2528
2528


In [10]:
class Bert4price(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.bert = BertModel.from_pretrained(BERT_PATH)
        self.fc = nn.Sequential(
            nn.Linear(768, 128),
            nn.ReLU(),
            nn.Linear(128, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
    def forward(self, text, mask):
        _, output = self.bert(input_ids=text, attention_mask=mask,return_dict=False)
        output = self.fc(output)
        return output

In [11]:
bert = Bert4price()
bert = bert.to(DEVICE)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
crierion = nn.MSELoss()
optimizer = torch.optim.Adam(bert.parameters(), lr = LR, weight_decay=WD)

In [13]:
for idx in range(EPS):
    for state in ["train", "valid"]:
        tqdm_bar = tqdm(loader[state])
        for value, content in tqdm_bar:
            text, mask = content["input_ids"].squeeze(1), content["attention_mask"]
            text, mask = text.to(DEVICE), mask.to(DEVICE)
            value = value.to(DEVICE)
            output = bert(text, mask)
            loss = crierion(output, value)
            if state == "train":
                optimizer.zero_grad() 
                loss.backward()
                optimizer.step()
            print(loss)
        

  return F.mse_loss(input, target, reduction=self.reduction)
  0%|          | 0/632 [00:01<?, ?it/s]


RuntimeError: Found dtype Double but expected Float

In [16]:
value, content = next(iter(loader["train"]))
print(value.dtype)

torch.float64


In [25]:
print(type(output), value.dtype)

<class 'torch.Tensor'> torch.float64


In [26]:
print(value.dtype, torch.Tensor(output, dtype=torch.float64))

TypeError: new() received an invalid combination of arguments - got (Tensor, dtype=torch.dtype), but expected one of:
 * (*, torch.device device)
      didn't match because some of the keywords were incorrect: dtype
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, *, torch.device device)
 * (object data, *, torch.device device)


In [9]:
num, con = next(iter(stock_loader))
mask = con["attention_mask"].to(DEVICE)
text = con["input_ids"].squeeze(1).to(DEVICE)

In [11]:
print(num)
print(text)

tensor([ -6.3071,   5.8796,  -7.9196,   0.0000,   2.0390,  -4.4554,   3.2570,
         -5.8994,   0.0000,   2.4213,   0.0000,   9.9534,   0.0000,  13.1478,
        -11.0043,  -2.5806, -11.3261, -10.1483,  -9.4243,  -3.9139,   3.1169,
          7.9885,   5.2445,   4.3514,   6.9842,   7.4411,  10.8778,   4.3455,
          6.2412,  12.6855,  13.0587,  -0.9754], dtype=torch.float64)
tensor([[  101, 10474,  2522,  ...,     0,     0,     0],
        [  101,  1996,  2991,  ...,     0,     0,     0],
        [  101, 18804,  4518,  ...,     0,     0,     0],
        ...,
        [  101,  2327,  1019,  ...,     0,     0,     0],
        [  101,  2327,  1019,  ...,     0,     0,     0],
        [  101,  2005,  9090,  ...,     0,     0,     0]], device='cuda:0')


In [22]:
output = bert(text, mask)