In [1]:
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
from tqdm import tqdm
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

In [2]:
model_1 = nn.Sequential(
    nn.Linear(768, 360), 
    nn.ELU(),
    nn.Linear(360, 124),
    nn.ELU(),
    nn.Linear(124, 2),
    # nn.ELU(),
    # nn.Linear(128, 2)
).to(device)

In [3]:
model_2 = copy.deepcopy(model_1)

In [4]:
def rearray(arr_str):
    arr_str = arr_str.strip("'").replace('\n', '').replace('[', '').replace(']', '').split()
    numpy_array = np.array(arr_str, dtype=float)
    return numpy_array

In [5]:
class DfDataset(Dataset):
    def __init__(self, df, col):
        self.df = df
        self.col = col
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        val = self.df[self.col].iloc[idx]
        reg_lbl = self.df['score'].iloc[idx]
        if reg_lbl <= 1:
            cls_lbl = 0
            reg_lbl = reg_lbl
        else:
            cls_lbl = 1
            reg_lbl = reg_lbl / 2800
        arr = rearray(val)
        return arr, cls_lbl, reg_lbl

In [6]:
df_c = pd.read_csv('../data/compiled.csv')
df_c = df_c.sample(frac=1).reset_index(drop=True)

In [7]:
train_df, val_df = df_c[:70000], df_c[70000:]
train_title_df = train_df[['title', 'score']]
val_title_df = val_df[['title', 'score']]

train_url_df = train_df[['url', 'score']]
val_url_df = val_df[['url', 'score']]

In [8]:
train_ds, val_ds = DfDataset(train_title_df, col='title'), DfDataset(val_title_df, col='title')

In [9]:
train_loader = DataLoader(train_ds, batch_size=32, 
                          num_workers=2, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32,
                        num_workers=2, shuffle=True)

In [16]:
epochs = 500
optimizer = optim.AdamW(model_1.parameters(), lr=1e-5)
lr_scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, epochs=500, steps_per_epoch=len(train_loader))
mse_loss = nn.MSELoss()
bce_loss = nn.BCELoss()
def loss_fn(output, Y):
    cls_lbl, reg_lbl = Y[0], Y[1]
    cls_op, reg_op = F.sigmoid(output[:, 0]), output[:, 1]
    bce_l = bce_loss(cls_op, cls_lbl)
    mse_l = mse_loss(reg_op * cls_lbl, reg_lbl *  cls_lbl)
    return bce_l + mse_l

In [17]:
for epoch in range(epochs):
    tr_loss_per_batch = []
    val_loss_per_batch = []
    for sample in train_loader:
        X, Y_cls, Y_reg = sample
        X, Y_cls, Y_reg = X.to(torch.float32).to(device), Y_cls.to(torch.float32).to(device), Y_reg.to(torch.float32).to(device)
        target = model_1(X)
        loss = loss_fn(target, [Y_cls, Y_reg])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        tr_loss_per_batch.append(loss.item())
        lr_scheduler.step()
    with torch.no_grad():
        for sample in val_loader:
            X, Y_cls, Y_reg = sample
            X, Y_cls, Y_reg = X.to(torch.float32).to(device), Y_cls.to(torch.float32).to(device), Y_reg.to(torch.float32).to(device)
            target = model_1(X)
            loss = loss_fn(target, [Y_cls, Y_reg])
            val_loss_per_batch.append(loss.item())
            
    print(f"Epoch: {epoch+1}/{epochs}")
    print(f"Training loss: {np.mean(tr_loss_per_batch)} Validation Loss: {np.mean(val_loss_per_batch)}")

Epoch: 1/500
Training loss: 0.6594508046205998 Validation Loss: 0.6570080648215053
Epoch: 2/500
Training loss: 0.6410306334441062 Validation Loss: 0.6589330164388346
Epoch: 3/500
Training loss: 0.6364170515014222 Validation Loss: 0.6572224629191926
Epoch: 4/500
Training loss: 0.6313526404841072 Validation Loss: 0.6481342680347614
Epoch: 5/500
Training loss: 0.6268800272856595 Validation Loss: 0.6489515794923131
Epoch: 6/500
Training loss: 0.621579896488399 Validation Loss: 0.6510958737268234
Epoch: 7/500
Training loss: 0.615714778956809 Validation Loss: 0.6453538011438169
Epoch: 8/500
Training loss: 0.6103444621103119 Validation Loss: 0.6436298140131247
Epoch: 9/500
Training loss: 0.6029729639644814 Validation Loss: 0.6457765114764435
Epoch: 10/500
Training loss: 0.5959663734493788 Validation Loss: 0.6515623577677023
Epoch: 11/500
Training loss: 0.5877935127431775 Validation Loss: 0.6546416356921577
Epoch: 12/500
Training loss: 0.5791902054489422 Validation Loss: 0.6603284573402649
Epo

tensor([  2,   1,  49,   1,   1,   2,   7,   1,   2, 144,   3,   3,   2,   2,
          3,   1])