In [None]:
import argparse, yaml
parser = argparse.ArgumentParser()
parser.add_argument('--yaml', type=str, default='./configs/Books.yaml')
args = parser.parse_args([])
config = yaml.load(open(args.yaml, 'r'), Loader=yaml.FullLoader)

In [None]:
import json
meta = json.load(open('/home/doyooni303/experiments/LLMRec/data/amazon/Books/Books_meta_name_dict.json','r'

))

In [None]:
for k,v in meta.items():
    print(k,v)
    break

In [None]:
import argparse
import os,sys
sys.path.append('/home/doyooni303/experiments/LLMRec/ReLLMRec')
os.chdir('/home/doyooni303/experiments/LLMRec/ReLLMRec')
import yaml

import torch
import torch.nn as nn
from torch.optim import AdamW
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AdamW
from peft import get_peft_model, LoraConfig, TaskType

from src.dataset.dataset import AmazonDataset
parser = argparse.ArgumentParser()
parser.add_argument('--yaml', type=str, default='./configs/Books.yaml')
args = parser.parse_args([])
config = yaml.load(open(args.yaml, 'r'), Loader=yaml.FullLoader)

device = torch.device(f'cuda:{config["gpu"]}' if torch.cuda.is_available() else 'cpu')
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
torch.random.manual_seed(config['seed'])
sys.path.append(os.getcwd())
from src.models.model import CandiRec

model = CandiRec(config).to(device=device, dtype=torch.float16)
from torch.optim import AdamW
optimizer = AdamW(model.parameters(), lr=float(config['lr']))
validloader = DataLoader(AmazonDataset(config, 'valid'), batch_size=config['batch_size'], shuffle=False, num_workers=4)
batch = next(iter(validloader))
for k,v in batch.items():
    if ("input_ids" in k) or ('attention_mask' in k):
        batch[k] = v.to(device)
    else:
        batch[k] = v

top_k_indices, preds = model(batch, flag='valid')


In [None]:
target_ids = batch['target_item_id'].numpy()

In [None]:
max_k_indices = model.max_k_indices.detach().cpu().numpy()


In [None]:
import numpy as np
from typing import Union


class Metric(object):
    def __init__(self):
        pass

    @staticmethod
    def hits(target_ids, top_k_ids):
        if len(top_k_ids.shape) == 1:
            top_k_ids = top_k_ids.reshape(1, -1)
        hit_count = 0
        for target_id, top_k_id in zip(target_ids, top_k_ids):
            if target_id in top_k_id:
                hit_count += 1
        return hit_count

    @staticmethod
    def hit_ratio(target_ids, top_k_ids):
        hit_count = Metric.hits(target_ids, top_k_ids)
        return round(hit_count / len(target_ids), 5)

    @staticmethod
    def precision(taret_ids, top_k_ids, K):
        hit_ratio = Metric.hit_ratio(taret_ids, top_k_ids)
        return round(hit_ratio / K, 5)

    @staticmethod
    def recall(taret_ids, top_k_ids):
        return Metric.hit_ratio(taret_ids, top_k_ids)

    @staticmethod
    def F1(taret_ids, top_k_ids, K):
        prec = Metric.precision(taret_ids, top_k_ids, K)
        recall = Metric.recall(taret_ids, top_k_ids)

        if (prec + recall) != 0:
            return round(2 * prec * recall / (prec + recall), 5)
        else:
            return 0

    @staticmethod
    def NDCG(target_ids, top_k_ids, K):
        expanded_targets = np.expand_dims(target_ids, axis=1)
        hit_matrix = (top_k_ids == expanded_targets).astype(np.float64)
        position_indices = np.arange(1, K + 1)
        discounts = 1 / np.log2(position_indices + 1)

        dcg = np.sum(hit_matrix * discounts, axis=1)
        idcg = 1 / np.log2(2)
        ndcg = dcg / idcg

        return ndcg, ndcg.mean()


def ranking_evaluation(target_ids, max_k_ids, top_k_list: Union[list, int]):
    measure = dict()
    for K in top_k_list:
        if K > max_k_ids.shape[1]:
            K = max_k_ids.shape[1]
            print(f"K is larger than the number of items. K is set to {K}")
        top_k_ids = max_k_ids[:, :K]
        hr = Metric.hit_ratio(target_ids, top_k_ids)
        NDCG = Metric.NDCG(target_ids, top_k_ids, K)
        measure.update({K: {"HR": hr, "NDCG": NDCG}})

    return measure


In [None]:
m = torch.tensor(max_k_indices)
mm = m.clone()
mm[:, 3] = 3
m, mm

In [None]:
top_k_list = [10, 15, 20]

for K in top_k_list:
    top_k_ids = max_k_indices[:, :K]
    print(f"{top_k_ids}")
    # measure = ranking_evaluation(target_ids, max_k_indices, top_k_list)
    print(f"{K}: {Metric.hit_ratio(target_ids, top_k_ids)}")

In [None]:
from time import strftime, localtime, time
from tqdm import tqdm
from src.utils import Log
from src.evaluation import Metric

class Trainer(object):
    def __init__(self, model, optimizer, config):
        self.config = config
        self.model = model
        self.optimizer = optimizer
        self.trainloader = DataLoader(AmazonDataset(config, 'train'), batch_size=config['batch_size'], shuffle=True, num_workers=4)
        self.validloader = DataLoader(AmazonDataset(config, 'valid'), batch_size=config['batch_size'], shuffle=False, num_workers=4)
        
        # Initiating best model and best score
        self.min_k = min(self.config['top_k_list'])
        self.best_model = None
        self.best_results = None
        self.best_score = 0
        
        # Use f-string for better readability
        self.log = Log(self.config)
        self.device = torch.device(f'cuda:{config["gpu"]}' if torch.cuda.is_available() else 'cpu')
        self.metric = Metric()


    def to_device(self, batch, device):
        for k, v in batch.items():
            if ("input_ids" in k) or ('attention_mask' in k):
                batch[k] = v.to(device)
            else:
                batch[k] = v
        return batch

    def train_epoch(self, model, optimizer, loader, epoch):
        # losses = []
        for i, batch in enumerate(loader):
            batch = self.to_device(batch, self.device)
            optimizer.zero_grad()
            _, loss = model(batch)
            loss.backward()
            optimizer.step()
            # losses.append(loss.item())
            self.log.add(f"Epoch: {epoch}/{self.config['max_epochs']} || Iter: {i}/{len(loader)} || Loss: {loss.item()}")
        
        return model, optimizer

    def evaluate(self, model, loader, metric):
        model.eval()
        metric.reset()

        for batch in tqdm(loader,desc="Evaluating"):
            batch = self.to_device(batch, device)
            
            top_k_indices, preds = model(batch, flag='valid')
            max_k_indices = model.max_k_indices
            target_ids = batch['target_item_id'].numpy()
            metric.update(target_ids, max_k_indices, self.config['top_k_list'])
        
        return metric.get_results()

    def save(self,):
        torch.save(self.best_model.state_dict(), f"{self.config['save_path']}/best_model.pth")
        self.log.add("Model is saved")


    def train_eval(self, trainloader, validloader, optimizer, config):
        self.log.add("Start training")
        self.model.train()
        for epoch in range(config['epochs']):
            self.model, optimizer = self.train_epoch(self.model, self.optimizer, self.trainloader, epoch)
            results = self.evaluate(self.model, self.validloader, self.metric)
            self.log.add(f"Epoch: {epoch}/{config['epochs']} || Results: {results}")
            
            if results[self.min_k]['NDCG'] > self.best_score:
                self.best_score = results[self.min_k]['NDCG']
                self.best_model = self.model
                self.best_results = results
                self.save()

        self.log.add("Best results: {self.best_results}")


