In [None]:
# drive mount. colab에 내 구글 드라이브 연결
from google.colab import drive
drive.mount('/content/drive')

!pip install transformers &> /dev/null
!pip install captum &> /dev/null

In [None]:
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

from transformers import BertTokenizer
from transformers import AutoTokenizer  # bert 모델에 따라 알맞은 tokenizer를 자동으로 로드
from transformers import BertForSequenceClassification
from transformers import get_linear_schedule_with_warmup

from captum.attr import visualization # XAI관련 라이브러리의 시각화 함수

from sklearn.metrics import accuracy_score

import os
import json
import pickle
import numpy as np
import random
from collections import OrderedDict

In [None]:
# GPU 찾기. 없으면 CPU로 동작
if torch.cuda.is_available():  
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

# check torch is available
print(torch.__version__)
print(torch.tensor([1.0, 2.0]).cuda())

In [None]:
# 부정적인 단어를 학습하려면 negative_regression=True
def development(model_saving_name, negative_regression=False):
  

  ######## generate dataloader ########
  def edit_data(path, random=False):
    input_ids, attention_masks, ratings, product = np.load(path, allow_pickle=True)
    if negative_regression:
      ratings -= 6
      ratings *= -1
    ratings = np.expand_dims(ratings, axis=-1)

    data_num = len(input_ids)
    batch_size = 8

    # Convert to tensors and make dataset
    input_ids = torch.cat(input_ids.tolist(), dim=0)
    attention_masks = torch.cat(attention_masks.tolist(), dim=0)
    labels = torch.tensor(ratings.tolist(), dtype=torch.float32)
    dataset = TensorDataset(input_ids, attention_masks, labels)
    if random: dataloader = DataLoader(dataset, sampler = RandomSampler(dataset), batch_size = batch_size)
    else: dataloader = DataLoader(dataset, sampler = SequentialSampler(dataset), batch_size = batch_size)

    return dataloader,data_num
  
  train_dataloader,train_size = edit_data("/content/drive/MyDrive/CS470_team_2in1/dataset/amazon_book_train_rev.npy", random=True)
  val_dataloader,val_size = edit_data("/content/drive/MyDrive/CS470_team_2in1/dataset/amazon_book_val_rev.npy", random=False)
  test_dataloader,test_size = edit_data("/content/drive/MyDrive/CS470_team_2in1/dataset/amazon_book_test_rev.npy", random=False)
  ##################################



  ######## prepare training ########
  model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
  model.cuda()

  optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, eps=1e-8)

  epochs = 4 # The BERT authors recommend between 2 and 4
  total_steps = len(train_dataloader) * epochs

  scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps)

  criterion = torch.nn.MSELoss(reduction='none')
  ##################################



  ######## #train/val/test #########
  results = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

  best_val_acc = 0
  best_val_loss = float('inf')
  best_model_state_dict = None

  for epoch in range(0, epochs):

      print('\n======== Epoch {:} / {:} ========\n'.format(epoch + 1, epochs))

      # train
      e_train_loss = 0
      e_train_acc = 0
      model.train()
      for batch in train_dataloader:
          b_input_ids, b_input_mask, b_labels = batch
          b_input_ids, b_input_mask, b_labels = b_input_ids.to(device), b_input_mask.to(device), b_labels.to(device)

          preds = model(input_ids=b_input_ids, attention_mask=b_input_mask)[0]
          #e_train_acc += accuracy_score(preds.argmax(dim=1).cpu(), b_labels.cpu(), normalize=False)

          loss = criterion(preds, b_labels).sum()
          e_train_loss += loss.item()

          torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Clip the norm of the gradients to 1.0. This is to help prevent the "exploding gradients" problem.

          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          scheduler.step()
      e_train_loss /= train_size
      #e_train_acc /= train_size
      results['train_loss'].append(e_train_loss)
      #results['train_acc'].append(e_train_acc)


      # validation
      model.eval()
      with torch.no_grad():
        e_val_loss = 0
        e_val_acc = 0
        for batch in val_dataloader:
          b_input_ids, b_input_mask, b_labels = batch
          b_input_ids, b_input_mask, b_labels = b_input_ids.to(device), b_input_mask.to(device), b_labels.to(device)

          preds = model(input_ids=b_input_ids, attention_mask=b_input_mask)[0]
          #e_val_acc += accuracy_score(preds.argmax(dim=1).cpu(), b_labels.cpu(), normalize=False)

          loss = criterion(preds, b_labels).sum()
          e_val_loss += loss.item()
        e_val_loss /= val_size
        #e_val_acc /= val_size
        results["val_acc"].append(e_val_acc)
        #results["val_loss"] = e_val_loss


      # save best model weights
      if best_val_loss > e_val_loss:
        best_val_loss = e_val_loss
        best_model_state_dict = OrderedDict({k: v.cpu() for k, v in model.state_dict().items()})
      
      
      print('train loss:',e_train_loss)
      #print('train acc:',e_train_acc)
      print('val loss:',e_val_loss)
      #print('val acc:',e_val_acc)

  #load best validation loss model and save
  model.load_state_dict(best_model_state_dict)
  torch.save(best_model_state_dict, "/content/drive/MyDrive/CS470_team_2in1/colab/model/"+model_saving_name)

  # test
  model.eval()
  with torch.no_grad():
    e_test_loss = 0
    e_test_acc = 0
    for batch in test_dataloader:
      b_input_ids, b_input_mask, b_labels = batch
      b_input_ids, b_input_mask, b_labels = b_input_ids.to(device), b_input_mask.to(device), b_labels.to(device)

      preds = model(input_ids=b_input_ids, attention_mask=b_input_mask)[0]
      #e_test_acc += accuracy_score(preds.argmax(dim=1).cpu(), b_labels.cpu(), normalize=False)

      loss = criterion(preds, b_labels).sum()
      e_test_loss += loss.item()
    e_test_loss /= test_size
    #e_test_acc /= test_size
    print('test loss:',e_test_loss)
    # print(e_test_acc)

In [None]:
development(model_saving_name="positive_regression.pt", negative_regression=False)
development(model_saving_name="negative_regression.pt", negative_regression=True)