In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import torch
import pickle


def fix_seeds(seed=101):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed) # In order to disable hash randomization and make the experiment reproducible.
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

def describe_interactions(df):
    print('number of users: ', len(df.user_id.unique()))
    print('number of movie: ', len(df.item_id.unique()))
    print('number of interactions: ', len(df))
    print('max user id', df.user_id.max())
    print('max movie id', df.item_id.max())
    print(' ')

def int_to_user_dict(interaction):
    """
    convert a list of interactions into a dictionary
    that maps each user to a list of their interactions
    input: df with columns ['user_id', 'movie_id']
    output: dict with key: user_id, value: list of movie_id
    """
    user_dict = {}
    for u, v in interaction:
        if(u not in user_dict.keys()):
            user_dict[u] = [v]
        else:
            user_dict[u].append(v)
    # Sort according to key.
    user_dict = dict(sorted(user_dict.items(), key=lambda x: x[0]))
    return user_dict

def save_user_dict_to_txt(user_dict, base_path, filename):
    with open(base_path + filename, 'w') as f:
        for u, v in user_dict.items():
            f.write(str(int(u)))
            for i in v:
                f.write(' ' + str(int(i)))
            f.write('\n')

def ndcg_at_k(r, k=20):
    """Calculate Normalized Discounted Cumulative Gain (NDCG) at k."""
    r = np.asfarray(r)[:k]

    def dcg(scores):
        """Calculate Discounted Cumulative Gain (DCG)."""
        return np.sum(scores / np.log2(np.arange(2, scores.size + 2)))

    # Convert the sorted list to a NumPy array
    dcg_max = dcg(np.asarray(sorted(r, reverse=True)))
    if not dcg_max:
        return 0.0

    return dcg(r) / dcg_max

#%%
# Fixed seed
seed = 101
fix_seeds(seed)

In [None]:
import openai
import pandas as pd
import time
import asyncio
import nest_asyncio
from concurrent.futures import ThreadPoolExecutor
import os.path as op


def helper_load_train(filename):
    user_dict_list = {}
    item_dict = set()
    item_dict_list = {}
    trainUser, trainItem = [], []

    with open(filename) as f:
        for line in f.readlines():
            line = line.strip('\n').split(' ')
            # print(line)
            if len(line) == 0:
                continue
            line = [int(i) for i in line]
            user = line[0]
            items = line[1:]
            item_dict.update(items)
            # LightGCN
            trainUser.extend([user] * len(items))
            trainItem.extend(items)
            if len(items) == 0:
                continue
            user_dict_list[user] = items

            for item in items:
                if item in item_dict_list.keys():
                    item_dict_list[item].append(user)
                else:
                    item_dict_list[item] = [user]

    return user_dict_list, item_dict, item_dict_list, trainUser, trainItem


music = pd.read_csv('book_profiles.csv')
ratings = pd.read_csv('ratings.csv')
users = pd.read_csv('user_profiles.csv')
movie_detail = pd.read_csv('book_detail.csv')


In [None]:
import openai

sys_prompt = """
I want you to act as an agent. You will act as a movie reading taste analyst roleplaying the user using the first person pronoun "I".
"""

prompt_modify = """
Given a user's rating history:

Here, the input <INPUT1>, <INPUT2>, <INPUT3>, <INPUT4>, <INPUT5> format must be: [movie titles]
user gives high ratings for following movie titles: <INPUT4> <INPUT5>

user gives a rating of 1 for following movie titles: <INPUT1>
user gives a rating of 2 for following movie titles: <INPUT2>
user gives a rating of 3 for following movie titles: <INPUT3>
user gives a rating of 4 for following movie titles: <INPUT4>
user gives a rating of 5 for following movie titles: <INPUT5>

My first request is "I need help creating movie watching taste for a user given the movie-rating history. (in no particular order)"  Generate as many TASTE-REASON pairs as possible, taste should focus on the movie titles.
Strictly follow the output format below:

TASTE: <-descriptive taste->
REASON: <-brief reason->

TASTE: <-descriptive taste->
REASON: <-brief reason->
.....

Secondly, analyze user tend to give what kinds of movies high ratings, and tend to give what kinds of movies low ratings.
Strictly follow the output format below:
HIGH RATINGS: <-conclusion of movies of high ratings(above 3)->
LOW RATINGS: <-conclusion of movies of low ratings(between 1 to 3)->
Answer should not be a combination of above two parts and not contain other words and should not contain movie titles.

"""



prompt_information_house = """
Given a user's rating history:

Here, the input <INPUT4>, <INPUT5> format must be: [movie titles]

user gives high ratings for following movie titles: <INPUT4>, <INPUT5>

My first request is "I need help creating movie watching taste for a user given the movie-rating history. (in no particular order)"
Generate two specific and most inclusive TASTE-REASON pairs as possible, taste should focus on the movies' genres and don't use obcure words like "have diverse taste".
Don't conclude the taste using any time-related word like 90's or classic.
Strictly follow the output format below:

TASTE: <-descriptive taste->
REASON: <-brief reason->

TASTE: <-descriptive taste->
REASON: <-brief reason->

"""

def get_completion(prompt, sys_prompt, model="gpt-4o-mini", temperature=0):
    messages = [{"role":"user", "content" : prompt}, {"role":"system", "content" : sys_prompt}]
    response = ''
    except_waiting_time = 0.1
    while response == '':
        try:
            response = openai.ChatCompletion.create(
                model=model,
                messages=messages,
                temperature=temperature,
                request_timeout=50
            )
            k_tokens = response["usage"]["total_tokens"]/1000
            p_tokens = response["usage"]["prompt_tokens"]/1000
            r_tokens = response["usage"]["completion_tokens"]/1000
            print("Tokens used: {:.2f}k".format(k_tokens))
            print("Prompt tokens: {:.2f}k".format(p_tokens))
            print("Response tokens: {:.2f}k".format(r_tokens))

        except Exception as e:
            #print(e)
            #print("Sleep for {:.2f}s".format(except_waiting_time))
            time.sleep(except_waiting_time)
            if except_waiting_time < 2:
                except_waiting_time *= 2
    return response.choices[0].message["content"]


async def polish_data(idx, prompt, sys_prompt, loop, executor, model="gpt-4o-mini", temperature=0):
    # print("begin {}".format(idx))
    start_time = time.time()
    polish_text = await loop.run_in_executor(executor, get_completion, prompt, sys_prompt, model, temperature)
    end_time = time.time()
    print(polish_text)
    # print("end {}".format(idx))
    #print(idx, polish_text)
    #print(polish_text)
    # polish_text_path = op.join("like_persona_description_information_house/", "persona_{}.txt".format(idx))
    polish_text_path = op.join("like_persona_description_modify/", "persona_{}.txt".format(idx))
    #print(polish_text_path)
    print(idx, end_time - start_time)
    with open(polish_text_path, 'w', encoding='utf-8') as f:
        f.write(polish_text)

In [None]:
import re
import pandas as pd
import os.path as op
import os

#%%
def generate_init_info(s):
    taste = re.findall(r'TASTE:(.+)', s)
    reason = re.findall(r'REASON:(.+)', s)
    high_rating = re.findall(r'HIGH RATINGS:(.+)', s)
    low_rating = re.findall(r'LOW RATINGS:(.+)', s)
    # return taste, reason, movie
    return "| ".join(taste), "| ".join(reason), "| ".join(high_rating), "| ".join(low_rating)


base_path = "like_persona_description_modify"

# Get all file names under the folder.
len_file_names = len(sorted(os.listdir(base_path)))
file_names = ["persona_"+str(i)+".txt" for i in range(len_file_names)]

# df = pd.DataFrame(index=range(len(file_names)), columns=["avatar_name", "age", "occupation", "traits", "description"])

df = pd.DataFrame(index=range(len(file_names)), columns=["taste", "reason", "high_rating", "low_rating"])
# df = pd.DataFrame(index=range(len(file_names)), columns=["taste", "reason"])

#%%
avatars_info = {}
for idx, file_name in enumerate(file_names):
    with open(base_path + "/" + file_name, "r") as f:
        persona = f.read()
    taste, reason, high_rating, low_rating = generate_init_info(persona)
    # taste, reason = generate_init_info(persona)
    avatars_info[idx] = {
        "taste": taste,
        "reason": reason,
        "high_rating": high_rating,
        "low_rating": low_rating
    }
    print(idx)

    df.loc[idx] = [taste, reason, high_rating, low_rating]
    # df.loc[idx] = [taste, reason]
    # break

#%%
df.to_csv("all_personas_description_modify.csv", index=False)

In [None]:
import re
import pandas as pd
import os.path as op
import os

#%%
def generate_init_info(s):
    taste = re.findall(r'TASTE:(.+)', s)
    reason = re.findall(r'REASON:(.+)', s)
    # high_rating = re.findall(r'HIGH RATINGS:(.+)', s)
    # low_rating = re.findall(r'LOW RATINGS:(.+)', s)
    # return taste, reason, movie
    return "| ".join(taste), "| ".join(reason) #, "| ".join(high_rating), "| ".join(low_rating)

base_path = "like_persona_description_information_house"

# Get all file names under the folder.
len_file_names = len(sorted(os.listdir(base_path)))
file_names = ["persona_"+str(i)+".txt" for i in range(len_file_names)]

# df = pd.DataFrame(index=range(len(file_names)), columns=["avatar_name", "age", "occupation", "traits", "description"])

# df = pd.DataFrame(index=range(len(file_names)), columns=["taste", "reason", "high_rating", "low_rating"])
df = pd.DataFrame(index=range(len(file_names)), columns=["taste", "reason"])

#%%
avatars_info = {}
for idx, file_name in enumerate(file_names):
    with open(base_path + "/" + file_name, "r") as f:
        persona = f.read()
    # taste, reason, high_rating, low_rating = generate_init_info(persona)
    taste, reason = generate_init_info(persona)
    avatars_info[idx] = {
        "taste": taste,
        "reason": reason,
        # "high_rating": high_rating,
        # "low_rating": low_rating
    }
    print(idx)

    # df.loc[idx] = [taste, reason, high_rating, low_rating]
    df.loc[idx] = [taste, reason]
    # break

#%%
df.to_csv("all_personas_like_information_house.csv", index=False)

In [None]:
import threading

global global_k_tokens
global global_start_time
global global_steps
global global_last_tokens_record
global global_interval
global global_finished_users
global global_finished_pages
global global_error_cast
global lock

global_k_tokens = 0
global_start_time = 0
global_steps = 0
global_last_tokens_record = 0
global_interval = 10
global_finished_users = 0
global_finished_pages = 0
global_error_cast = 0

lock = threading.Lock() # global lock for threads

In [None]:
import os
import random
import numpy as np
import torch
from termcolor import colored, cprint
import matplotlib.pyplot as plt


def fix_seeds(seed=101):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed) # In order to disable hash randomization and make the experiment reproducible.
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

def get_accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

def get_recall(y_true, y_pred):
    return np.sum(y_true & y_pred) / np.sum(y_true)

def get_precision(y_true, y_pred):
    return np.sum(y_true & y_pred) / np.sum(y_pred)

def get_f1(y_true, y_pred):
	p = get_precision(y_true, y_pred)
	r = get_recall(y_true, y_pred)
	if p + r == 0:
		return 0
	else:
		return 2 * p * r / (p + r)


In [None]:
import argparse


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--vis', nargs='?', default=-1,
                        help='we only want test value.')
    parser.add_argument('--seed', type=int, default=101,
                        help='Random seed.')
    parser.add_argument('--clear_checkpoints', action="store_true",
                        help='Whether clear the earlier checkpoints.')
    parser.add_argument("--candidate", action="store_true",
                        help="whether using the candidate set")
    parser.add_argument('--test_only', action="store_true",
                        help='Whether to test only.')
    parser.add_argument('--data_path', nargs='?', default='Movielens-1M/',
                        help='Input data path.')
    parser.add_argument('--dataset', nargs='?', default='agent4rec',
                        help='Choose a dataset')
    parser.add_argument('--embed_size', type=int, default=64,
                        help='Embedding size.')
    parser.add_argument('--batch_size', type=int, default=2048,
                        help='Batch size.')
    parser.add_argument('--lr', type=float, default=5e-3,
                        help='Learning rate.')
    parser.add_argument('--regs', type=float, default=1e-5,
                        help='Regularization.')
    parser.add_argument('--epoch', type=int, default=2000,
                        help='Number of epoch.')
    parser.add_argument('--Ks', type = int, default= 20,
                        help='Evaluate on Ks optimal items.')
    parser.add_argument('--verbose', type=int, default=5,
                        help='Interval of evaluation.')
    parser.add_argument('--saveID', type=str, default="",
                        help='Specify model save path.')
    parser.add_argument('--patience', type=int, default=10,
                        help='Early stopping point.')
    parser.add_argument('--checkpoint', type=str, default='',
                        help='Specify model save path.')
    parser.add_argument('--modeltype', type=str, default= 'MF',
                        help='Specify model save path.')
    parser.add_argument('--cuda', type=int, default=0,
                        help='Specify which gpu to use.')
    parser.add_argument('--IPStype', type=str, default='cn',
                        help='Specify the mode of weighting')
    parser.add_argument('--n_layers', type=int, default=0,
                        help='Number of GCN layers')
    parser.add_argument('--max2keep', type=int, default=1,
                        help='max checkpoints to keep')
    parser.add_argument('--infonce', type=int, default=0,
                        help='whether to use infonce loss or not')
    parser.add_argument('--neg_sample',type=int,default=1)
    parser.add_argument('--num_workers', type=int, default=8,
                        help='number of workers in data loader')
    parser.add_argument("--train_norm", action="store_true",
                        help="train_norm")
    parser.add_argument("--pred_norm", action="store_true",
                        help="pred_norm")

    parser.add_argument("--nodrop", action="store_true",
                        help="whether to drop out the enhanced training dataset")
    parser.add_argument("--no_wandb", action="store_true",
                        help="whether to use wandb")

    args, _ = parser.parse_known_args()

    # INFONCE
    if(args.modeltype == 'InfoNCE'):
        parser.add_argument('--tau', type=float, default=0.1,
                        help='temperature parameter')

    # MultVAE
    if(args.modeltype == 'MultVAE'):
        parser.add_argument('--total_anneal_steps', type=int, default=200000,
                        help='total anneal steps')
        parser.add_argument('--anneal_cap', type=float, default=0.2,
                        help='anneal cap')
        parser.add_argument('--p_dim0', type=int, default=200,
                        help='p_dim0')
        parser.add_argument('--p_dim1', type=int, default=600,
                        help='p_dim1')

    args_full, _ = parser.parse_known_args()
    special_args = list(set(vars(args_full).keys()) - set(vars(args).keys()))
    special_args.sort()

    return args_full, special_args

In [None]:
# import tensorflow as tf
import numpy as np
from inspect import signature
from functools import wraps
import heapq
import itertools
import time
import os
from concurrent.futures import ThreadPoolExecutor

def ensureDir(dir_path):
    d = os.path.dirname(dir_path)
    if not os.path.exists(d):
        os.makedirs(d)



def get_data_format(data_format):
    if data_format == "UIRT":
        columns = ["user", "item", "rating", "time"]

    elif data_format == "UIR":
        columns = ["user", "item", "rating"]

    elif data_format == "UIT":
        columns = ["user", "item", "time"]

    elif data_format == "UI":
        columns = ["user", "item"]

    else:
        raise ValueError("please choose a correct data format. ")

    return columns




def csr_to_user_dict(train_matrix):
    """convert a scipy.sparse.csr_matrix to a dict,
    where the key is row number, and value is the
    non-empty index in each row.
    """
    train_dict = {}
    for idx, value in enumerate(train_matrix):
        if len(value.indices):
            train_dict[idx] = value.indices.copy().tolist()
    return train_dict


def csr_to_user_dict_bytime(time_matrix,train_matrix):
    train_dict = {}
    time_matrix = time_matrix
    user_pos_items = csr_to_user_dict(train_matrix)
    for u, items in user_pos_items.items():
        sorted_items = sorted(items, key=lambda x: time_matrix[u,x])
        train_dict[u] = np.array(sorted_items, dtype=np.int32).tolist()

    return train_dict



def noise_validator(noise, allowed_noises):
    '''Validates the noise provided'''
    try:
        if noise in allowed_noises:
            return True
        elif noise.split('-')[0] == 'mask' and float(noise.split('-')[1]):
            t = float(noise.split('-')[1])
            if t >= 0.0 and t <= 1.0:
                return True
            else:
                return False
    except:
        return False
    pass


def randint_choice(high, size=None, replace=True, p=None, exclusion=None):
    """Return random integers from `0` (inclusive) to `high` (exclusive).
    """
    a = np.arange(high)
    if exclusion is not None:
        if p is None:
            p = np.ones_like(a)
        else:
            p = np.array(p, copy=True)
        p = p.flatten()
        p[exclusion] = 0
    if p is not None:
        p = p / np.sum(p)
    sample = np.random.choice(a, size=size, replace=replace, p=p)
    return sample


def batch_randint_choice(high, size, replace=True, p=None, exclusion=None):
    """Return random integers from `0` (inclusive) to `high` (exclusive).
    :param high: integer
    :param size: 1-D array_like
    :param replace: bool
    :param p: 2-D array_like
    :param exclusion: a list of 1-D array_like
    :return: a list of 1-D array_like sample
    """

    # if p is not None and (len(p) != len(size) or len(p[0]) != high):
    if p is not None and (len(p) != len(size) and len(p) != high):
        raise ValueError("The shape of 'p' is not compatible with the shapes of 'array' and 'size'!")

    if exclusion is not None and len(exclusion) != len(size):
        raise ValueError("The shape of 'exclusion' is not compatible with the shape of 'size'!")

    def choice_one(idx):
        # p_tmp = p[idx] if p is not None else None
        p_tmp = p if p is not None else None
        exc = exclusion[idx] if exclusion is not None else None
        return randint_choice(high, size[idx], replace=replace, p=p_tmp, exclusion=exc)

    with ThreadPoolExecutor() as executor:
        results = executor.map(choice_one, range(len(size)))

    return [result for result in results]


def typeassert(*type_args, **type_kwargs):
    def decorate(func):
        sig = signature(func)
        bound_types = sig.bind_partial(*type_args, **type_kwargs).arguments

        @wraps(func)
        def wrapper(*args, **kwargs):
            bound_values = sig.bind(*args, **kwargs)
            for name, value in bound_values.arguments.items():
                if name in bound_types:
                    if not isinstance(value, bound_types[name]):
                        raise TypeError('Argument {} must be {}'.format(name, bound_types[name]))
            return func(*args, **kwargs)
        return wrapper
    return decorate


def max_top_k(a, top_k=50):
    ele_idx = heapq.nlargest(top_k, zip(a, itertools.count()))
    return np.array([ele for ele, idx in ele_idx], dtype=np.intc)


def argmax_top_k(a, top_k=50):
    ele_idx = heapq.nlargest(top_k, zip(a, itertools.count()))
    return np.array([idx for ele, idx in ele_idx], dtype=np.intc)


def pad_sequences(sequences, value=0., max_len=None,
                  padding='post', truncating='post', dtype=np.int32):
    """Pads sequences to the same length.

    Args:
        sequences (list): A list of lists, where each element is a sequence.
        value (int or float): Padding value. Defaults to `0.`.
        max_len (int or None): Maximum length of all sequences.
        padding (str): `"pre"` or `"post"`: pad either before or after each
            sequence. Defaults to `post`.
        truncating (str): `"pre"` or `"post"`: remove values from sequences
            larger than `max_len`, either at the beginning or at the end of
            the sequences. Defaults to `post`.
        dtype (int or float): Type of the output sequences. Defaults to `np.int32`.

    Returns:
        np.ndarray: Numpy array with shape `(len(sequences), max_len)`.

    Raises:
        ValueError: If `padding` or `truncating` is not understood.
    """
    if max_len is None:
        max_len = np.max([len(x) for x in sequences])

    x = np.full([len(sequences), max_len], value, dtype=dtype)
    for idx, s in enumerate(sequences):
        if not len(s):
            continue  # empty list/array was found
        if truncating == 'pre':
            trunc = s[-max_len:]
        elif truncating == 'post':
            trunc = s[:max_len]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)

        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x




def timer(func):
    """The timer decorator
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print("%s function cost: %fs" % (func.__name__, end_time - start_time))
        return result
    return wrapper


In [None]:
import random as rd
import collections
from types import new_class
import numpy as np
import scipy.sparse as sp
from scipy.sparse import csr_matrix
import time
import torch
from copy import deepcopy
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os


# Helper function used when loading data from files
def helper_load(filename):
    user_dict_list = {}
    item_dict = set()

    with open(filename) as f:
        for line in f.readlines():
            line = line.strip('\n').split(' ')
            if len(line) == 0:
                continue
            line = [int(i) for i in line]
            user = line[0]
            items = line[1:]
            item_dict.update(items)
            if len(items) == 0:
                continue
            user_dict_list[user] = items

    return user_dict_list, item_dict,

def helper_load_train(filename):
    user_dict_list = {}
    item_dict = set()
    item_dict_list = {}
    trainUser, trainItem = [], []

    with open(filename) as f:
        for line in f.readlines():
            line = line.strip('\n').split(' ')
            # print(line)
            if len(line) == 0:
                continue
            line = [int(i) for i in line]
            user = line[0]
            items = line[1:]
            item_dict.update(items)
            # LightGCN
            trainUser.extend([user] * len(items))
            trainItem.extend(items)
            if len(items) == 0:
                continue
            user_dict_list[user] = items

            for item in items:
                if item in item_dict_list.keys():
                    item_dict_list[item].append(user)
                else:
                    item_dict_list[item] = [user]

    return user_dict_list, item_dict, item_dict_list, trainUser, trainItem
# It loads the data and creates a train_loader

class Data:

    def __init__(self, args):
        self.path = args.data_path + args.dataset + '/cf_data/'
        self.small_path=args.data_path + args.dataset+".mid"+"/"
        self.train_file = self.path + 'train.txt'
        self.valid_file = self.path + 'valid.txt'
        self.test_file = self.path + 'test.txt'

        if(args.nodrop):
            self.train_nodrop_file = self.path + 'train_nodrop.txt'
        self.nodrop = args.nodrop

        self.candidate = args.candidate
        if(args.candidate):
            self.test_neg_file = self.path + 'test_neg.txt'
        self.batch_size = args.batch_size
        self.neg_sample = args.neg_sample
        self.IPStype = args.IPStype
        self.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
        self.modeltype = args.modeltype

        self.user_pop_max = 0
        self.item_pop_max = 0
        self.infonce = args.infonce
        self.num_workers = args.num_workers
        self.dataset = args.dataset
        self.candidate = args.candidate

        # Number of total users and items
        self.n_users, self.n_items, self.n_observations = 0, 0, 0
        self.users = []
        self.items = []
        self.population_list = []
        self.weights = []

        # List of dictionaries of users and its observed items in corresponding dataset
        # {user1: [item1, item2, item3...], user2: [item1, item3, item4],...}
        # {item1: [user1, user2], item2: [user1, user3], ...}
        self.train_user_list = collections.defaultdict(list)
        self.valid_user_list = collections.defaultdict(list)
        if(self.dataset == "tencent_synthetic" or self.dataset == "kuairec_ood"):
            self.test_ood_user_list_1 = collections.defaultdict(list)
            self.test_ood_user_list_2 = collections.defaultdict(list)
            self.test_ood_user_list_3 = collections.defaultdict(list)
        else:
            self.test_user_list = collections.defaultdict(list)

        # Used to track early stopping point
        self.best_valid_recall = -np.inf
        self.best_valid_epoch, self.patience = 0, 0

        self.train_item_list = collections.defaultdict(list)
        self.Graph = None
        self.trainUser, self.trainItem, self.UserItemNet = [], [], []
        self.n_interactions = 0
        if(self.dataset == "tencent_synthetic" or self.dataset == "kuairec_ood"):
            self.test_ood_item_list_1 = []
            self.test_ood_item_list_2 = []
            self.test_ood_item_list_3 = []
        else:
            self.test_item_list = []

        #Dataloader
        self.train_data = None
        self.train_loader = None

        self.load_data()
        # model-specific attributes
        self.add_special_model_attr(args)

        self.get_dataloader()

    def add_special_model_attr(self, args):
        pass

    # self.trainUser and self.trainItem are respectively the users and items in the training set, in the form of an interaction list.
    def load_data(self):
        self.train_user_list, train_item, self.train_item_list, self.trainUser, self.trainItem = helper_load_train(
            self.train_file)
        self.valid_user_list, valid_item = helper_load(self.valid_file)

        self.test_user_list, self.test_item_list = helper_load(self.test_file)

        if(self.nodrop):
            self.train_nodrop_user_list, self.train_nodrop_item_list = helper_load(self.train_nodrop_file)

        if(self.candidate):
            self.test_neg_user_list, self.test_neg_item_list = helper_load(self.test_neg_file)
        else:
            self.test_neg_user_list, self.test_neg_item_list = None, None
        self.pop_dict_list = []


        temp_lst = [train_item, valid_item, self.test_item_list]

        self.users = list(set(self.train_user_list.keys()))
        self.items = list(set().union(*temp_lst))
        self.items.sort()
        # print(self.items)
        self.n_users = len(self.users)
        self.n_items = len(self.items)


        print("n_users: ", self.n_users)
        print("n_items: ", self.n_items)

        for i in range(self.n_users):
            self.n_observations += len(self.train_user_list[i])
            self.n_interactions += len(self.train_user_list[i])
            if i in self.valid_user_list.keys():
                self.n_interactions += len(self.valid_user_list[i])
            if(self.dataset == "tencent_synthetic" or self.dataset == "kuairec_ood"):
                if i in self.test_ood_user_list_1.keys():
                    self.n_interactions += len(self.test_ood_user_list_1[i])
                if i in self.test_ood_user_list_2.keys():
                    self.n_interactions += len(self.test_ood_user_list_2[i])
                if i in self.test_ood_user_list_3.keys():
                    self.n_interactions += len(self.test_ood_user_list_3[i])
            else:
                if i in self.test_user_list.keys():
                    self.n_interactions += len(self.test_user_list[i])



        # Population matrix
        pop_dict = {}
        for item, users in self.train_item_list.items():
            pop_dict[item] = len(users) + 1
        for item in range(0, self.n_items):
            if item not in pop_dict.keys():
                pop_dict[item] = 1

            self.population_list.append(pop_dict[item])

        pop_user = {key: len(value) for key, value in self.train_user_list.items()}
        pop_item = {key: len(value) for key, value in self.train_item_list.items()}
        self.pop_item = pop_item
        self.pop_user = pop_user
        # Convert to a unique value.
        sorted_pop_user = list(set(list(pop_user.values())))
        sorted_pop_item = list(set(list(pop_item.values())))
        sorted_pop_user.sort()
        sorted_pop_item.sort()
        self.n_user_pop = len(sorted_pop_user)
        self.n_item_pop = len(sorted_pop_item)

        user_idx = {}
        item_idx = {}
        for i, item in enumerate(sorted_pop_user):
            user_idx[item] = i
        for i, item in enumerate(sorted_pop_item):
            item_idx[item] = i

        self.user_pop_idx = np.zeros(self.n_users, dtype=int)
        self.item_pop_idx = np.zeros(self.n_items, dtype=int)
        # Convert the originally sparse popularity into dense popularity.
        for key, value in pop_user.items():
            self.user_pop_idx[key] = user_idx[value]
        for key, value in pop_item.items():
            # print(key, value)
            self.item_pop_idx[key] = item_idx[value]

        user_pop_max = max(self.user_pop_idx)
        item_pop_max = max(self.item_pop_idx)

        self.user_pop_max = user_pop_max
        self.item_pop_max = item_pop_max

        self.weights = self.get_weight()
        self.weight_dict={i:self.weights[i] for i in range(len(self.weights))}
        self.sorted_weight=sorted(self.weight_dict.items(),key=lambda x: x[1])

        self.sample_items = np.array(self.items, dtype=int)


    def get_dataloader(self):
        self.train_data = TrainDataset(self.modeltype, self.users, self.train_user_list, self.user_pop_idx, self.item_pop_idx, \
                                        self.neg_sample, self.n_observations, self.n_items, self.sample_items, self.weights, self.infonce, self.items)

        self.train_loader = DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, drop_last=True)

    def get_weight(self):

        if 's' in self.IPStype:
            pop = self.population_list
            pop = np.clip(pop, 1, max(pop))
            pop = pop / max(pop)
            return pop


        pop = self.population_list
        pop = np.clip(pop, 1, max(pop))
        pop = pop / np.linalg.norm(pop, ord=np.inf)
        pop = 1 / pop

        if 'c' in self.IPStype:
            pop = np.clip(pop, 1, np.median(pop))
        if 'n' in self.IPStype:
            pop = pop / np.linalg.norm(pop, ord=np.inf)

        return pop

    def _convert_sp_mat_to_sp_tensor(self, X):
        coo = X.tocoo().astype(np.float32)
        row = torch.Tensor(coo.row).long()
        col = torch.Tensor(coo.col).long()
        index = torch.stack([row, col])
        data = torch.FloatTensor(coo.data)
        return torch.sparse.FloatTensor(index, data, torch.Size(coo.shape))

    def getSparseGraph(self):

        if self.Graph is None:
            try:
                pre_adj_mat = sp.load_npz(self.path + '/s_pre_adj_mat.npz')
                print("finish loading adjacency matrix")
                norm_adj = pre_adj_mat
            # If there is no preprocessed adjacency matrix, generate one.
            except:
                print("generating adjacency matrix")
                s = time.time()
                adj_mat = sp.dok_matrix((self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32)
                adj_mat = adj_mat.tolil()
                self.trainItem = np.array(self.trainItem)
                self.trainUser = np.array(self.trainUser)
                self.UserItemNet = csr_matrix((np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
                                                shape=(self.n_users, self.n_items))
                R = self.UserItemNet.tolil()
                adj_mat[:self.n_users, self.n_users:] = R
                adj_mat[self.n_users:, :self.n_users] = R.T
                adj_mat = adj_mat.tocsr()
                sp.save_npz(self.path + '/adj_mat.npz', adj_mat)
                print("successfully saved adj_mat...")

                adj_mat = adj_mat.todok()

                rowsum = np.array(adj_mat.sum(axis=1))
                d_inv = np.power(rowsum, -0.5).flatten()
                d_inv[np.isinf(d_inv)] = 0.
                d_mat = sp.diags(d_inv)

                norm_adj = d_mat.dot(adj_mat)
                norm_adj = norm_adj.dot(d_mat)
                norm_adj = norm_adj.tocsr()
                end = time.time()
                print(f"costing {end - s}s, saved norm_mat...")
                sp.save_npz(self.path + '/s_pre_adj_mat.npz', norm_adj)
            self.Graph = self._convert_sp_mat_to_sp_tensor(norm_adj)
            self.Graph = self.Graph.coalesce()

        return self.Graph

    def get_not_candidate(self):
        if self.candidate:
            not_candidate_dict = {}
            with open('data/' + self.dataset + '/not_candidate.txt', 'r') as f:
                for line in f.readlines():
                    line = line.strip('\n').split(' ')
                    if len(line) == 0:
                        continue
                    line = [int(i) for i in line]
                    user = line[0]
                    items = line[1:]
                    not_candidate_dict[user] = items

            return not_candidate_dict
        else:
            return None

class TrainDataset(torch.utils.data.Dataset):

    def __init__(self, modeltype, users, train_user_list, user_pop_idx, item_pop_idx, neg_sample, \
                n_observations, n_items, sample_items, weights, infonce, items):
        self.modeltype = modeltype
        self.users = users
        self.train_user_list = train_user_list
        self.user_pop_idx = user_pop_idx
        self.item_pop_idx = item_pop_idx
        self.neg_sample = neg_sample
        self.n_observations = n_observations
        self.n_items = n_items
        self.sample_items = sample_items
        self.weights = weights
        self.infonce = infonce
        self.items = items

    def __getitem__(self, index):

        index = index % len(self.users)
        user = self.users[index]
        if self.train_user_list[user] == []:
            pos_items = 0
        else:
            pos_item = rd.choice(self.train_user_list[user])

        user_pop = self.user_pop_idx[user]
        pos_item_pop = self.item_pop_idx[pos_item]
        pos_weight = self.weights[pos_item]

        if self.infonce == 1 and self.neg_sample == -1:

            return user, pos_item, user_pop, pos_item_pop, pos_weight

        elif self.infonce == 1 and self.neg_sample != -1:
            neg_items = randint_choice(self.n_items, size=self.neg_sample, exclusion=self.train_user_list[user])
            neg_items_pop = self.item_pop_idx[neg_items]

            return user, pos_item, user_pop, pos_item_pop, pos_weight, torch.tensor(neg_items).long(), neg_items_pop

        else:
            while True:
                idx = rd.randint(0, self.n_items -1)
                neg_item = self.items[idx]

                if neg_item not in self.train_user_list[user]:
                    break

            neg_item_pop = self.item_pop_idx[neg_item]
            return user, pos_item, user_pop, pos_item_pop, pos_weight, neg_item, neg_item_pop

    def __len__(self):
        return self.n_observations

In [None]:
import numpy as np


def _get_pairwise_all_likefism_data(dataset):
    user_input_pos, user_input_neg, num_idx_pos, num_idx_neg, item_input_pos, item_input_neg = [], [], [], [], [], []
    num_items = dataset.num_items
    num_users = dataset.num_users
    train_matrix = dataset.train_matrix
    for u in range(num_users):
        items_by_u = train_matrix[u].indices.copy().tolist()
        num_items_by_u = len(items_by_u)
        if num_items_by_u > 1:
            negative_items = randint_choice(num_items, num_items_by_u, replace=True, exclusion = items_by_u)

            for index, i in enumerate(items_by_u):
                j = negative_items[index]
                user_input_neg.append(items_by_u)
                num_idx_neg.append(num_items_by_u)
                item_input_neg.append(j)

                items_by_u.remove(i)
                user_input_pos.append(items_by_u)
                num_idx_pos.append(num_items_by_u-1)
                item_input_pos.append(i)

    return user_input_pos, user_input_neg, num_idx_pos, num_idx_neg, item_input_pos, item_input_neg

def _get_pointwise_all_likefism_data(dataset, num_negatives, train_dict):
    user_input,num_idx,item_input,labels = [],[],[],[]
    num_users = dataset.num_users
    num_items = dataset.num_items
    for u in range(num_users):
        items_by_user = train_dict[u].copy()
        items_set = set(items_by_user)
        size = len(items_by_user)
        for i in items_by_user:
            # negative instances
            for _ in range(num_negatives):
                j = np.random.randint(num_items)
                while j in items_set:
                    j = np.random.randint(num_items)
                user_input.append(items_by_user)
                item_input.append(j)
                num_idx.append(size)
                labels.append(0)
            items_by_user.remove(i)
            user_input.append(items_by_user)
            item_input.append(i)
            num_idx.append(size-1)
            labels.append(1)
    return user_input,num_idx,item_input,labels

def _get_pairwise_all_likefossil_data(dataset, high_order, train_dict):
    user_input_id,user_input_pos,user_input_neg, num_idx_pos, num_idx_neg, item_input_pos,item_input_neg,item_input_recents = [],[], [], [],[],[],[],[]
    for u in range(dataset.num_users):
        items_by_user = train_dict[u].copy()
        num_items_by_u = len(items_by_user)
        if  num_items_by_u > high_order:
            negative_items = randint_choice(dataset.num_items, num_items_by_u, replace=True, exclusion = items_by_user)
            for idx in range(high_order,len(train_dict[u])):
                i = train_dict[u][idx] # item id
                item_input_recent = []
                for t in range(1,high_order+1):
                    item_input_recent.append(train_dict[u][idx-t])
                item_input_recents.append(item_input_recent)
                j = negative_items[idx]
                user_input_neg.append(items_by_user)
                num_idx_neg.append(num_items_by_u)
                item_input_neg.append(j)

                items_by_user.remove(i)
                user_input_id.append(u)
                user_input_pos.append(items_by_user)
                num_idx_pos.append(num_items_by_u-1)
                item_input_pos.append(i)

    return user_input_id,user_input_pos,user_input_neg, num_idx_pos, num_idx_neg, item_input_pos,item_input_neg,item_input_recents

def _get_pointwise_all_likefossil_data(dataset, high_order, num_negatives, train_dict):
    user_input_id,user_input,num_idx,item_input,item_input_recents,labels = [],[],[],[],[],[]
    for u in range(dataset.num_users):
        items_by_user = train_dict[u].copy()
        items_set = set(items_by_user)
        size = len(items_by_user)
        for idx in range(high_order,len(train_dict[u])):
            i = train_dict[u][idx] # item id
            item_input_recent = []
            for t in range(1,high_order+1):
                item_input_recent.append(train_dict[u][idx-t])
            # negative instances
            for _ in range(num_negatives):
                j = np.random.randint(dataset.num_items)
                while j in items_set:
                    j = np.random.randint(dataset.num_items)
                user_input_id.append(u)
                user_input.append(items_by_user)
                item_input_recents.append(item_input_recent)
                item_input.append(j)
                num_idx.append(size)
                labels.append(0)
            items_by_user.remove(i)
            user_input.append(items_by_user)
            user_input_id.append(u)
            item_input_recents.append(item_input_recent)
            item_input.append(i)
            num_idx.append(size-1)
            labels.append(1)
    return user_input_id,user_input,num_idx,item_input,item_input_recents,labels

In [None]:
import numpy as np


class Sampler(object):
    """Base class for all Samplers.

    Every Sampler subclass has to provide an __iter__ method, providing a way
    to iterate over indices of dataset elements, and a __len__ method that
    returns the length of the returned iterators.
    """

    def __init__(self):
        pass

    def __iter__(self):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError


class SequentialSampler(Sampler):
    """Samples elements sequentially, always in the same order.
    """

    def __init__(self, data_source):
        """Initializes a new `SequentialSampler` instance.

        Args:
            data_source (_Dataset): Dataset to sample from.
        """
        super(SequentialSampler, self).__init__()
        self.data_source = data_source

    def __iter__(self):
        return iter(range(len(self.data_source)))

    def __len__(self):
        return len(self.data_source)


class RandomSampler(Sampler):
    """Samples elements randomly, without replacement.
    """

    def __init__(self, data_source):
        """Initializes a new `SequentialSampler` instance.

        Args:
            data_source (_Dataset): Dataset to sample from.
        """
        super(RandomSampler, self).__init__()
        self.data_source = data_source

    def __iter__(self):
        perm = np.random.permutation(len(self.data_source)).tolist()
        return iter(perm)

    def __len__(self):
        return len(self.data_source)


class BatchSampler(Sampler):
    """Wraps another sampler to yield a mini-batch of indices.
    """

    def __init__(self, sampler, batch_size, drop_last):
        """Initializes a new `BatchSampler` instance.

        Args:
            sampler (Sampler): Base sampler.
            batch_size (int): Size of mini-batch.
            drop_last (bool): If `True`, the sampler will drop the last batch
                if its size would be less than `batch_size`.
        """
        super(BatchSampler, self).__init__()
        if not isinstance(sampler, Sampler):
            raise ValueError("sampler should be an instance of "
                             "torch.utils.data.Sampler, but got sampler={}"
                             .format(sampler))
        if not isinstance(batch_size, int) or isinstance(batch_size, bool) or \
                batch_size <= 0:
            raise ValueError("batch_size should be a positive integeral value, "
                             "but got batch_size={}".format(batch_size))
        if not isinstance(drop_last, bool):
            raise ValueError("drop_last should be a boolean value, but got "
                             "drop_last={}".format(drop_last))
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last

    def __iter__(self):
        batch = []
        for idx in self.sampler:
            batch.append(idx)
            if len(batch) == self.batch_size:
                yield batch
                batch = []
        if len(batch) > 0 and not self.drop_last:
            yield batch

    def __len__(self):
        if self.drop_last:
            return len(self.sampler) // self.batch_size
        else:
            return (len(self.sampler) + self.batch_size - 1) // self.batch_size


class _Dataset(object):
    """Pack the given data to one dataset.

    Args:
        data (list or tuple): a list of 'data'.
    """

    def __init__(self, data):
        for d in data:
            if len(d) != len(data[0]):
                raise ValueError("The length of the given data are not equal!")
            # assert len(d) == len(data[0])
        self.data = data

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, idx):
        return [data[idx] for data in self.data]


class _DataLoaderIter(object):
    """Iterates once over the dataset, as specified by the sampler.
    """

    def __init__(self, loader):
        self.dataset = loader.dataset
        self.batch_sampler = loader.batch_sampler
        self.sample_iter = iter(self.batch_sampler)

    def __len__(self):
        return len(self.batch_sampler)

    def __next__(self):
        indices = next(self.sample_iter)  # may raise StopIteration
        batch = [self.dataset[i] for i in indices]

        transposed = [list(samples) for samples in zip(*batch)]
        if len(transposed) == 1:
            transposed = transposed[0]
        return transposed

    def __iter__(self):
        return self


class DataIterator(object):
    """`DataIterator` provides iterators over the dataset.

    This class combines some data sets and provides a batch iterator over them.
    For example::

        users = list(range(10))
        items = list(range(10, 20))
        labels = list(range(20, 30))

        data_iter = DataIterator(users, items, labels, batch_size=4, shuffle=False)
        for bat_user, bat_item, bat_label in data_iter:
            print(bat_user, bat_item, bat_label)

        data_iter = DataIterator(users, items, batch_size=4, shuffle=True, drop_last=True)
        for bat_user, bat_item in data_iter:
            print(bat_user, bat_item)

    """

    def __init__(self, *data, batch_size=1, shuffle=False, drop_last=False):
        """
        Args:
            *data: Variable length data list.
            batch_size (int): How many samples per batch to load. Defaults to `1`.
            shuffle (bool): Set to `True` to have the data reshuffled at every
                epoch. Defaults to `False`.
            drop_last (bool): Set to `True` to drop the last incomplete batch,
                if the dataset size is not divisible by the batch size.
                If `False` and the size of dataset is not divisible by the
                batch size, then the last batch will be smaller.
                Defaults to `False`.

        Raises:
            ValueError: If the length of the given data are not equal.
        """
        dataset = _Dataset(list(data))
        self.dataset = dataset
        self.batch_size = batch_size
        self.drop_last = drop_last

        if shuffle:
            sampler = RandomSampler(dataset)
        else:
            sampler = SequentialSampler(dataset)

        self.batch_sampler = BatchSampler(sampler, batch_size, drop_last)

    def __iter__(self):
        return _DataLoaderIter(self)

    def __len__(self):
        return len(self.batch_sampler)

In [None]:
from cmath import cos
import torch
import torch.nn as nn
import math
import numpy as np
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from scipy.special import lambertw
import random
# from data import Data
import scipy.sparse as sp

# based on LightGCN
# n_layers = 0: MF
class AbstractModel(nn.Module):
    def __init__(self, args, data):
        super(AbstractModel, self).__init__()
        print("AbstractModel")

        # basic information
        self.args = args
        self.name = args.modeltype
        self.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
        # self.saveID = args.saveID
        self.data = data

        # graph
        self.Graph = data.getSparseGraph()

        # basic hyper-parameters
        self.emb_dim = args.embed_size
        self.decay = args.regs
        self.train_norm = args.train_norm
        self.pred_norm = args.pred_norm
        self.n_layers = args.n_layers
        self.modeltype = args.modeltype
        self.batch_size = args.batch_size

        self.init_embedding()

    def init_embedding(self):
        self.embed_user = nn.Embedding(self.data.n_users, self.emb_dim)
        self.embed_item = nn.Embedding(self.data.n_items, self.emb_dim)

        nn.init.xavier_normal_(self.embed_user.weight)
        nn.init.xavier_normal_(self.embed_item.weight)

    def compute(self):
        users_emb = self.embed_user.weight
        items_emb = self.embed_item.weight
        all_emb = torch.cat([users_emb, items_emb])

        embs = [all_emb]
        g_droped = self.Graph

        for layer in range(self.n_layers):
            # print(g_droped.device, all_emb.device)
            all_emb = torch.sparse.mm(g_droped, all_emb)
            embs.append(all_emb)
        embs = torch.stack(embs, dim=1)

        light_out = torch.mean(embs, dim=1)
        users, items = torch.split(light_out, [self.data.n_users, self.data.n_items])

        return users, items

    #! must be implemented
    def forward(self):
        raise NotImplementedError

    # Prediction function used when evaluation
    def predict(self, users, items=None):
        if items is None:
            items = list(range(self.data.n_items))

        all_users, all_items = self.compute()

        users = all_users[torch.tensor(users)]
        items = all_items[torch.tensor(items)]

        if(self.pred_norm == True):
            users = F.normalize(users, dim = -1)
            items = F.normalize(items, dim = -1)

        items = items.clone().detach().transpose(0, 1) # Convert items to tensor and then transpose
        # items = torch.transpose(items, 0, 1)
        rate_batch = torch.matmul(users, items) # user * item

        return rate_batch.cpu().detach().numpy()

In [None]:
class AbstractEvaluator(object):
    """Base class for all evaluator.
    """

    def __init__(self):
        pass

    def metrics_info(self):
        """Get all metrics information.

        Returns:
            str: A string consist of all metrics information， such as
            `"Precision@10    Precision@20    NDCG@10    NDCG@20"`.
        """
        raise NotImplementedError

    def evaluate(self, model):
        """Evaluate `model`.

        Args:
            model: The model need to be evaluated. This model must have
                a method `predict_for_eval(self, users)`, where the argument
                `users` is a list of users and the return is a 2-D array that
                contains `users` rating/ranking scores on all items.

        Returns:
            str: A string consist of all results, such as
            `"0.18663847    0.11239596    0.35824192    0.21479650"`.
        """
        raise NotImplementedError

In [None]:
import numpy as np
import sys


def hit(rank, ground_truth):
    # HR is equal to Recall when dataset is loo split.
    last_idx = sys.maxsize
    for idx, item in enumerate(rank):
        if item == ground_truth:
            last_idx = idx
            break
    result = np.zeros(len(rank), dtype=np.float32)
    result[last_idx:] = 1.0
    return result


def precision(rank, ground_truth):
    # Precision is meaningless when dataset is loo split.
    hits = [1 if item in ground_truth else 0 for item in rank]
    result = np.cumsum(hits, dtype=np.float32)/np.arange(1, len(rank)+1)
    return result


def recall(rank, ground_truth):
    # Recall is equal to HR when dataset is loo split.
    hits = [1 if item in ground_truth else 0 for item in rank]
    result = np.cumsum(hits, dtype=np.float32) / len(ground_truth)
    return result


def map(rank, ground_truth):
    # Reference: https://blog.csdn.net/u010138758/article/details/69936041
    # MAP is equal to MRR when dataset is loo split.
    # According to the definition, it seems that there is no such thing as MAP@N in MAP.
    pre = precision(rank, ground_truth)
    pre = [pre[idx] if item in ground_truth else 0 for idx, item in enumerate(rank)]
    sum_pre = np.cumsum(pre, dtype=np.float32)
    # relevant_num = np.cumsum([1 if item in ground_truth else 0 for item in rank])
    relevant_num = [min(idx + 1, len(ground_truth)) for idx, _ in enumerate(rank)]
    result = [p/r_num if r_num!=0 else 0 for p, r_num in zip(sum_pre, relevant_num)]
    return result


def ndcg(rank, ground_truth):
    len_rank = len(rank)
    idcg_len = min(len(ground_truth), len_rank)
    idcg = np.cumsum(1.0 / np.log2(np.arange(2, len_rank + 2)))
    idcg[idcg_len:] = idcg[idcg_len - 1]

    dcg = np.cumsum([1.0/np.log2(idx+2) if item in ground_truth else 0.0 for idx, item in enumerate(rank)])
    result = dcg/idcg
    return result


def mrr(rank, ground_truth):
    # MRR is equal to MAP when dataset is loo split.
    last_idx = sys.maxsize
    for idx, item in enumerate(rank):
        if item in ground_truth:
            last_idx = idx
            break
    result = np.zeros(len(rank), dtype=np.float32)
    result[last_idx:] = 1.0/(last_idx+1)
    return result


metric_dict = {"Precision": precision,
               "Recall": recall,
               "MAP": map,
               "NDCG": ndcg,
               "MRR": mrr}

In [None]:
"""
@author: Zhongchuan Sun
"""
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm


class UniEvaluator(AbstractEvaluator):
    """Python implementation `UniEvaluator` for item ranking task.

    Evaluation metrics of `UniEvaluator` are configurable and can
    automatically fit both leave-one-out and fold-out data splitting
    without specific indication:

    * **First**, evaluation metrics of this class are configurable via the
      argument `metric`. Now there are five configurable metrics: `Precision`,
      `Recall`, `MAP`, `NDCG` and `MRR`.

    * **Second**, this class and its evaluation metrics can automatically fit
      both leave-one-out and fold-out data splitting without specific indication.
      In **leave-one-out** evaluation, 1) `Recall` is equal to `HitRatio`;
      2) The implementation of `NDCG` is compatible with fold-out; 3) `MAP` and
      `MRR` have same numeric values; 4) `Precision` is meaningless.
    """

    @typeassert(user_train_dict=dict, user_test_dict=(dict, None.__class__))
    def __init__(self, user_train_dict, user_test_dict, user_neg_test=None,
                 metric=None, top_k=50, batch_size=1024, num_thread=8):
        """Initializes a new `UniEvaluator` instance.

        Args:
            user_train_dict (dict): Each key is user ID and the corresponding
                value is the list of **training items**.
            user_test_dict (dict): Each key is user ID and the corresponding
                value is the list of **test items**.
            metric (None or list of str): If `metric == None`, metric will
                be set to `["Precision", "Recall", "MAP", "NDCG", "MRR"]`.
                Otherwise, `metric` must be one or a sublist of metrics
                mentioned above. Defaults to `None`.
            top_k (int or list of int): `top_k` controls the Top-K item ranking
                performance. If `top_k` is an integer, K ranges from `1` to
                `top_k`; If `top_k` is a list of integers, K are only assigned
                these values. Defaults to `50`.
            batch_size (int): An integer to control the test batch size.
                Defaults to `1024`.
            num_thread (int): An integer to control the test thread number.
                Defaults to `8`.

        Raises:
             ValueError: If `metric` or one of its element is invalid.
        """
        super(UniEvaluator, self).__init__()
        print("Check UniEva")
        if metric is None:
            metric = ["Precision", "Recall", "MAP", "NDCG", "MRR"]
            print(metric)
        elif isinstance(metric, str):
            metric = [metric]
        elif isinstance(metric, (set, tuple, list)):
            pass
        else:
            raise TypeError("The type of 'metric' (%s) is invalid!" % (metric.__class__.__name__))

        for m in metric:
            if m not in metric_dict:
                raise ValueError("There is not the metric named '%s'!" % (metric))

        self.user_pos_train = user_train_dict
        self.user_pos_test = {user: set(items) for user, items in user_test_dict.items()}
        self.user_neg_test = user_neg_test
        self.metrics_num = len(metric)
        self.metrics = metric
        self.num_thread = num_thread
        self.batch_size = batch_size

        self.max_top = top_k if isinstance(top_k, int) else max(top_k)
        if isinstance(top_k, int):
            self.top_show = np.arange(top_k) + 1
        else:
            self.top_show = np.sort(top_k)

    def metrics_info(self):
        """Get all metrics information.

        Returns:
            str: A string consist of all metrics information， such as
                `"Precision@10    Precision@20    NDCG@10    NDCG@20"`.
        """
        metrics_show = ['\t'.join([("%s@"%metric + str(k)).ljust(12) for k in self.top_show])
                        for metric in self.metrics]
        metric = '\t'.join(metrics_show)
        return "metrics:\t%s" % metric

    def evaluate(self, model, test_users=None):
        """Evaluate `model`.

        Args:
            model: The model need to be evaluated. This model must have
                a method `predict_for_eval(self, users)`, where the argument
                `users` is a list of users and the return is a 2-D array that
                contains `users` rating/ranking scores on all items.

        Returns:
            str: A single-line string consist of all results, such as
                `"0.18663847    0.11239596    0.35824192    0.21479650"`.
        """
        # B: batch size
        # N: the number of items
        test_users = test_users if test_users is not None else list(self.user_pos_test.keys())
        if not isinstance(test_users, (list, tuple, set, np.ndarray)):
            raise TypeError("'test_user' must be a list, tuple, set or numpy array!")

        test_users = DataIterator(test_users, batch_size=self.batch_size,
                                  shuffle=False, drop_last=False)
        batch_result = []
        for batch_users in tqdm(test_users):
            if self.user_neg_test is not None:
                candidate_items = [list(self.user_pos_test[u]) + self.user_neg_test[u] for u in batch_users]
                test_items = [set(range(len(self.user_pos_test[u]))) for u in batch_users]

                ranking_score = model.predict(batch_users, candidate_items)  # (B,N)
                ranking_score = pad_sequences(ranking_score, value=-np.inf, dtype=np.float32)

                ranking_score = np.array(ranking_score)
            else:
                test_items = [self.user_pos_test[u] for u in batch_users]
                ranking_score = model.predict(batch_users, None)  # (B,N)
                ranking_score = np.array(ranking_score)

                # set the ranking scores of training items to -inf,
                # then the training items will be sorted at the end of the ranking list.
                for idx, user in enumerate(batch_users):
                    train_items = self.user_pos_train[user]
                    ranking_score[idx][train_items] = -np.inf

            result = self.eval_score_matrix(ranking_score, test_items, self.metrics,
                                            top_k=self.max_top, thread_num=self.num_thread)  # (B,k*metric_num)
            batch_result.append(result)

        # concatenate the batch results to a matrix
        all_user_result = np.concatenate(batch_result, axis=0)  # (num_users, metrics_num*max_top)
        final_result = np.mean(all_user_result, axis=0)  # (1, metrics_num*max_top)

        final_result = np.reshape(final_result, newshape=[self.metrics_num, self.max_top])  # (metrics_num, max_top)
        final_result = final_result[:, self.top_show - 1]
        final_result = np.reshape(final_result, newshape=[-1])
        buf = '\t'.join([("%.8f" % x).ljust(12) for x in final_result])
        return buf

    @typeassert(score_matrix=np.ndarray, test_items=list)
    def eval_score_matrix(self, score_matrix, test_items, metric, top_k, thread_num):
        def _eval_one_user(idx):
            scores = score_matrix[idx]  # all scores of the test user
            test_item = test_items[idx]

            ranking = argmax_top_k(scores, top_k)  # Top-K items
            result = [metric_dict[m](ranking, test_item) for m in metric]

            result = np.array(result, dtype=np.float32).flatten()
            return result

        with ThreadPoolExecutor(max_workers=thread_num) as executor:
            batch_result = executor.map(_eval_one_user, range(len(test_items)))

        result = list(batch_result)  # generator to list
        return np.array(result)  # list to ndarray

In [None]:
import numpy as np
from collections import OrderedDict
import pandas as pd


class GroupedEvaluator(AbstractEvaluator):
    """`GroupedEvaluator` evaluates models in user groups.

    This class evaluates the ranking performance of models in user groups,
    which are split according to the numbers of users' interactions in
    **training data**. This function can be activated by the argument
    `group_view`, which must be a list of integers.
    For example, if `group_view = [10,30,50,100]`, users will be split into
    four groups: `(0, 10]`, `(10, 30]`, `(30, 50]` and `(50, 100]`. And the
    users whose interacted items more than `100` will be discard.
    """
    @typeassert(user_train_dict=dict, user_test_dict=dict, group_view=list)
    def __init__(self, user_train_dict, user_test_dict, user_neg_test=None,
                 metric=None, group_view=None, top_k=50, batch_size=1024, num_thread=8):
        """Initializes a new `GroupedEvaluator` instance.

        Args:
            user_train_dict (dict): Each key is user ID and the corresponding
                value is the list of **training items**.
            user_test_dict (dict): Each key is user ID and the corresponding
                value is the list of **test items**.
            metric (None or list of str): If `metric == None`, metric will
                be set to `["Precision", "Recall", "MAP", "NDCG", "MRR"]`.
                Otherwise, `metric` must be one or a sublist of metrics
                mentioned above. Defaults to `None`.
            group_view (list of int): A list of integers.
            top_k (int or list of int): `top_k` controls the Top-K item ranking
                performance. If `top_k` is an integer, K ranges from `1` to
                `top_k`; If `top_k` is a list of integers, K are only assigned
                these values. Defaults to `50`.
            batch_size (int): An integer to control the test batch size.
                Defaults to `1024`.
            num_thread (int): An integer to control the test thread number.
                Defaults to `8`.

        Raises:
             TypeError: If `group_view` is not a list.
             ValueError: If user splitting with `group_view` is not suitable.
        """
        super(GroupedEvaluator, self).__init__()

        if not isinstance(group_view, list):
            raise TypeError("The type of 'group_view' must be `list`!")

        self.evaluator = UniEvaluator(user_train_dict, user_test_dict, user_neg_test,
                                      metric=metric, top_k=top_k,
                                      batch_size=batch_size,
                                      num_thread=num_thread)
        self.user_pos_train = user_train_dict
        self.user_pos_test = user_test_dict

        group_list = [0] + group_view
        group_info = [("(%d,%d]:" % (g_l, g_h)).ljust(12)
                      for g_l, g_h in zip(group_list[:-1], group_list[1:])]

        all_test_user = list(self.user_pos_test.keys())
        num_interaction = [len(self.user_pos_train[u]) for u in all_test_user]
        group_idx = np.searchsorted(group_list[1:], num_interaction)
        user_group = pd.DataFrame(list(zip(all_test_user, group_idx)),
                                  columns=["user", "group"])
        grouped = user_group.groupby(by=["group"])

        self.grouped_user = OrderedDict()
        for idx, users in grouped:
            if idx < len(group_info):
                self.grouped_user[group_info[idx]] = users["user"].tolist()

        if not self.grouped_user:
            raise ValueError("The splitting of user groups is not suitable!")

    def metrics_info(self):
        """Get all metrics information.

        Returns:
            str: A string consist of all metrics information， such as
            `"Precision@10    Precision@20    NDCG@10    NDCG@20"`.
        """
        return self.evaluator.metrics_info()

    def evaluate(self, model):
        """Evaluate `model` in user groups.

        Args:
            model: The model need to be evaluated. This model must have
                a method `predict_for_eval(self, users)`, where the argument
                `users` is a list of users and the return is a 2-D array that
                contains `users` rating/ranking scores on all items.

        Returns:
            str: A multi-line string consist of all results of groups, such as:
                `"(0,10]:   0.00648002   0.00421617   0.00301847   0.00261693\n
                (10,30]:  0.00686600   0.00442968   0.00310077   0.00249169\n
                (30,50]:  0.00653595   0.00326797   0.00217865   0.00163399\n
                (50,100]: 0.00423729   0.00211864   0.00141243   0.00105932"`
        """
        result_to_show = ""
        for group, users in self.grouped_user.items():
            tmp_result = self.evaluator.evaluate(model, users)
            result_to_show = "%s\n%s\t%s" % (result_to_show, group, tmp_result)

        return result_to_show

In [None]:
class ProxyEvaluator(AbstractEvaluator):
    """`ProxyEvaluator` is the interface to evaluate models.

    `ProxyEvaluator` contains various evaluation protocols:

    * **First**, evaluation metrics of this class are configurable via the
      argument `metric`. Now there are five configurable metrics: `Precision`,
      `Recall`, `MAP`, `NDCG` and `MRR`.

    * **Second**, this class and its evaluation metrics can automatically fit
      both leave-one-out and fold-out data splitting without specific indication.
      In **leave-one-out** evaluation, 1) `Recall` is equal to `HitRatio`;
      2) The implementation of `NDCG` is compatible with fold-out; 3) `MAP` and
      `MRR` have same numeric values; 4) `Precision` is meaningless.

    * **Furthermore**, the ranking performance of models can be viewed in user
      groups, which are split according to the numbers of users' interactions
      in **training data**. This function can be activated by the argument
      `group_view`. Specifically, if `group_view == None`, the ranking performance
      will be viewed without groups; If `group_view` is a list of integers,
      the ranking performance will be view in groups.
      For example, if `group_view = [10,30,50,100]`, users will be split into
      four groups: `(0, 10]`, `(10, 30]`, `(30, 50]` and `(50, 100]`. And the
      users whose interacted items more than `100` will be discarded.

    * **Finally and importantly**, all the functions mentioned above depend on
      `UniEvaluator`, which is implemented by **python** and **cpp**.
      And both of the two versions are **multi-threaded**.
    """

    @typeassert(user_train_dict=dict, user_test_dict=dict)
    def __init__(self, dataset, user_train_dict, user_test_dict, user_neg_test=None, metric=None,
                 group_view=None, top_k=50, batch_size=1024, num_thread=8,dump_dict=None,pop_mask=None):
        """Initializes a new `ProxyEvaluator` instance.

        Args:
            user_train_dict (dict): Each key is user ID and the corresponding
                value is the list of **training items**.
            user_test_dict (dict): Each key is user ID and the corresponding
                value is the list of **test items**.
            metric (None or list of str): If `metric == None`, metric will
                be set to `["Precision", "Recall", "MAP", "NDCG", "MRR"]`.
                Otherwise, `metric` must be one or a sublist of metrics
                mentioned above. Defaults to `None`.
            group_view (None or list of int): If `group_view == None`, the ranking
                performance will be viewed without groups. If `group_view` is a
                list of integers, ranking performance will be viewed in groups.
                Defaults to `None`.
            top_k (int or list of int): `top_k` controls the Top-K item ranking
                performance. If `top_k` is an integer, K ranges from `1` to
                `top_k`; If `top_k` is a list of integers, K are only assigned
                these values. Defaults to `50`.
            batch_size (int): An integer to control the test batch size.
                Defaults to `1024`.
            num_thread (int): An integer to control the test thread number.
                Defaults to `8`.

        Raises:
            ValueError: If `metric` or one of its element is not in
                `["Precision", "Recall", "MAP", "NDCG", "MRR"]`.

        TODO:
            * Check the validation of `num_thread` in cpp implementation.
        """
        super(ProxyEvaluator, self).__init__()
        if group_view is not None:
            print("Grouped Evaluator")
            self.evaluator = GroupedEvaluator(user_train_dict, user_test_dict, user_neg_test,
                                              metric=metric, group_view=group_view,
                                              top_k=top_k, batch_size=batch_size,
                                              num_thread=num_thread)
        else:
            self.evaluator = UniEvaluator(user_train_dict, user_test_dict, user_neg_test,
                                          metric=metric, top_k=top_k,
                                          batch_size=batch_size,
                                          num_thread=num_thread)
            # print("Pass")

    def metrics_info(self):
        """Get all metrics information.

        Returns:
            str: A string consist of all metrics information， such as
                `"Precision@10    Precision@20    NDCG@10    NDCG@20"`.
        """
        return self.evaluator.metrics_info()

    def evaluate(self, model):
        """Evaluate `model`.

        Args:
            model: The model need to be evaluated. This model must have
                a method `predict_for_eval(self, users)`, where the argument
                `users` is a list of users and the return is a 2-D array that
                contains `users` rating/ranking scores on all items.

        Returns:
            str: A string consist of all results, such as
                `"0.18663847    0.11239596    0.35824192    0.21479650"`.
        """
        return self.evaluator.evaluate(model)

In [None]:
%load_ext cython

In [None]:
%%cython
# distutils: language = c++
"""
Author: Zhongchuan Sun
"""
import numpy as np
cimport numpy as np

def get_float_type():
    cdef int size_of_float = sizeof(float) * 8
    if size_of_float == 32:
        return np.float32
    elif size_of_float == 64:
        return np.float64
    else:
        raise EnvironmentError(f"The size of 'float' is {size_of_float}, but expected 32 or 64 bits.")

def get_int_type():
    cdef int size_of_int = sizeof(int) * 8
    if size_of_int == 16:
        return np.int16
    elif size_of_int == 32:
        return np.int32
    else:
        raise EnvironmentError(f"The size of 'int' is {size_of_int}, but expected 16 or 32 bits.")

float_type = get_float_type()
int_type = get_int_type()

def is_ndarray(np.ndarray array, dtype):
    if not isinstance(array, np.ndarray):
        return False
    if array.dtype != dtype:
        return False
    if array.base is not NULL:
        return False
    return True


In [None]:
import random
import re
from sys import get_coroutine_origin_tracking_depth
from sys import exit
random.seed(101)
import matplotlib.pyplot as plt
import math
import matplotlib.patches as mpatches
#from scipy.linalg import svd
import itertools
import torch
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
# from evaluator import ProxyEvaluator
import collections
import os

def merge_user_list(user_lists):
    out = collections.defaultdict(list)
    # Loop over each user list
    for user_list in user_lists:
        # Loop over each user in the user list
        for key, item in user_list.items():
            out[key] = out[key] + item
    return out


def merge_user_list_no_dup(user_lists):
    out = collections.defaultdict(list)
    for user_list in user_lists:
        for key, item in user_list.items():
            out[key] = out[key] + item

    for key in out.keys():
        out[key]=list(set(out[key]))
    return out


def save_checkpoint(model, epoch, checkpoint_dir, buffer, max_to_keep=10):
    state = {
        'epoch': epoch,
        'state_dict': model.state_dict(),
    }

    filename = os.path.join(checkpoint_dir, 'epoch={}.checkpoint.pth.tar'.format(epoch))
    torch.save(state, filename)
    buffer.append(filename)
    if len(buffer)>max_to_keep:
        os.remove(buffer[0])
        del(buffer[0])

    return buffer


def restore_checkpoint(model, checkpoint_dir, device, force=False, pretrain=False):
    """
    If a checkpoint exists, restores the PyTorch model from the checkpoint.
    Returns the model and the current epoch.
    """
    cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
                if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]

    if not cp_files:
        print('No saved model parameters found')
        if force:
            raise Exception("Checkpoint not found")
        else:
            return model, 0,

    epoch_list = []

    regex = re.compile(r'\d+')

    for cp in cp_files:
        epoch_list.append([int(x) for x in regex.findall(cp)][0])

    epoch = max(epoch_list)


    if not force:
        print("Which epoch to load from? Choose in range [0, {})."
              .format(epoch), "Enter 0 to train from scratch.")
        print(">> ", end = '')
        # inp_epoch = int(input())
        inp_epoch = epoch
        if inp_epoch not in range(epoch + 1):
            raise Exception("Invalid epoch number")
        if inp_epoch == 0:
            print("Checkpoint not loaded")
            clear_checkpoint(checkpoint_dir)
            return model, 0,
    else:
        print("Which epoch to load from? Choose in range [0, {}).".format(epoch))
        inp_epoch = int(input())
        if inp_epoch not in range(0, epoch):
            raise Exception("Invalid epoch number")

    filename = os.path.join(checkpoint_dir,
                            'epoch={}.checkpoint.pth.tar'.format(inp_epoch))

    print("Loading from checkpoint {}?".format(filename))

    checkpoint = torch.load(filename, map_location = str(device))

    try:
        if pretrain:
            model.load_state_dict(checkpoint['state_dict'], strict=False)
        else:
            model.load_state_dict(checkpoint['state_dict'])
        print("=> Successfully restored checkpoint (trained for {} epochs)"
              .format(checkpoint['epoch']))
    except:
        print("=> Checkpoint not successfully restored")
        raise

    return model, inp_epoch


def restore_best_checkpoint(epoch, model, checkpoint_dir, device):
    """
    Restore the best performance checkpoint
    """
    cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
                if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]

    filename = os.path.join(checkpoint_dir,
                            'epoch={}.checkpoint.pth.tar'.format(epoch))

    print("Loading from checkpoint {}?".format(filename))

    checkpoint = torch.load(filename, map_location = str(device))

    model.load_state_dict(checkpoint['state_dict'])
    print("=> Successfully restored checkpoint (trained for {} epochs)"
          .format(checkpoint['epoch']))

    return model


def clear_checkpoint(checkpoint_dir):
    filelist = [f for f in os.listdir(checkpoint_dir) if f.endswith(".pth.tar")]
    for f in filelist:
        os.remove(os.path.join(checkpoint_dir, f))

    print("Checkpoint successfully removed")


def evaluation(args, data, model, epoch, base_path, evaluator, name="valid"):
    # Evaluate with given evaluator

    ret = evaluator.evaluate(model)
    # ret, _ = evaluator.evaluate(model)
    ret = [float(value) for value in ret.split()]

    # n_ret = {"recall": ret[1], "hit_ratio": ret[5], "precision": ret[0], "ndcg": ret[3], "mrr":ret[4], "map":ret[2]}
    n_ret = {"recall": ret[1], "precision": ret[0], "ndcg": ret[3], "mrr":ret[4], "map":ret[2]}


    perf_str = name+':{}'.format(n_ret)
    print(perf_str)
    with open(base_path + 'stats.txt', 'a') as f:
        f.write(perf_str + "\n")
    # Check if need to early stop (on validation)
    is_best=False
    early_stop=False
    if name=="test":
    # if name=="valid":
        if ret[1] > data.best_valid_recall:
            data.best_valid_epoch = epoch
            data.best_valid_recall = ret[1]
            data.patience = 0
            is_best=True
        else:
            data.patience += 1
            if data.patience >= args.patience:
                print_str = "The best performance epoch is % d " % data.best_valid_epoch
                print(print_str)
                early_stop=True

    return is_best, early_stop, n_ret


def Item_pop(args, data, model):

    for K in range(5):

        eval_pop = ProxyEvaluator(data, data.train_user_list, data.pop_dict_list[K], top_k=[(K+1)*10],
                                   dump_dict=merge_user_list([data.train_user_list, data.valid_user_list]))

        ret, _ = eval_pop.evaluate(model)

        print_str = "Overlap for K = % d is % f" % ( (K+1)*10, ret[1] )

        print(print_str)

        with open('stats.txt', 'a') as f:
            f.write(print_str + "\n")


def ensureDir(dir_path):

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)


def split_grp_view(data,grp_idx):
    n=len(grp_view)
    split_data=[{} for _ in range(n)]

    for key,item in data.items():
        for it in item:
            if key not in split_data[grp_idx[it]].keys():
                split_data[grp_idx[it]][key]=[]
            split_data[grp_idx[it]][key].append(it)
    return split_data


def checktensor(tensor):
    t=tensor.detach().cpu().numpy()
    if np.max(np.isnan(t)):
        idx=np.argmax(np.isnan(t))
        return idx
    else:
        return -1

def get_rotation_matrix(axis, theta):
    """
    Find the rotation matrix associated with counterclockwise rotation
    about the given axis by theta radians.
    Credit: http://stackoverflow.com/users/190597/unutbu

    Args:
        axis (list): rotation axis of the form [x, y, z]
        theta (float): rotational angle in radians

    Returns:
        array. Rotation matrix.
    """

    axis = np.asarray(axis)
    theta = np.asarray(theta)
    axis = axis/math.sqrt(np.dot(axis, axis))
    a = math.cos(theta/2.0)
    b, c, d = -axis*math.sin(theta/2.0)
    aa, bb, cc, dd = a*a, b*b, c*c, d*d
    bc, ad, ac, ab, bd, cd = b*c, a*d, a*c, a*b, b*d, c*d
    return np.array([[aa+bb-cc-dd, 2*(bc+ad), 2*(bd-ac)],
                     [2*(bc-ad), aa+cc-bb-dd, 2*(cd+ab)],
                     [2*(bd+ac), 2*(cd-ab), aa+dd-bb-cc]])


grads = {}
def save_grad(name):
    def hook(grad):
        torch.clamp(grad, -1, 1)
        grads[name] = grad
    return hook


def fix_seeds(seed=101):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed) # In order to disable hash randomization and make the experiment reproducible.
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

def align_loss(x, y, alpha=2):
    return (x - y).norm(p=2, dim=1).pow(alpha).mean()

def uniform_loss(x, t=2):
    return torch.pdist(x, p=2).pow(2).mul(-t).exp().mean().log()

def visualize_and_save_log(file_dir, dataset_name, show=False):
    # Read file_dir line by line and keep only
    if(dataset_name == "tencent_synthetic"):
        pass
    else:
        valid_recall, valid_ndcg, test_recall, test_ndcg = [], [], [], []

        with open(file_dir, 'r') as f:
            # count = 0
            for line in f:
                line = line.split(' ')
                if("valid" in line[0]):
                    valid_recall.append(float(line[1][:-1]))
                    valid_ndcg.append(float(line[7][:-1]))
                if("test" in line[0]):
                    test_recall.append(float(line[1][:-1]))
                    test_ndcg.append(float(line[7][:-1]))

        epochs = list(range(0, len(valid_recall)))
        epochs = [i*5 for i in epochs]
        # Define table.
        result = pd.DataFrame({'epochs': epochs, 'valid_recall': valid_recall, 'test_recall': test_recall, 'valid_ndcg': valid_ndcg, 'test_ndcg': test_ndcg})
        # df is all rows except the last one.
        df = result.iloc[:-1, :]

        fig=plt.figure()
        x = df.epochs
        y1 = df.valid_recall
        y2 = df.test_recall
        print(max(y1), max(y2), 1.1*max(y1), 1.1*max(y2))
        # ax1 displays y1, ax2 displays y2.
        ax1=fig.subplots()
        ax2=ax1.twinx()    # Using twinx(), get ax2 symmetric to ax1, sharing the same x-axis but with asymmetric y-axis coordinates.
        ax1.plot(x,y1,'g-', label='valid_recall')
        ax2.plot(x,y2,'b--', label='test_recall')
        # Coordinate axis range
        ax1.set_ylim(min(y1), 1.15*(max(y1)-min(y1))+min(y1))
        ax2.set_ylim(min(y2), 1.15*(max(y2)-min(y2))+min(y2))

        ax1.set_xlabel('epochs')
        ax1.set_ylabel('valid_recall')
        ax2.set_ylabel('test_recall')
        # legend
        ax1.legend(loc='upper left')
        ax2.legend(loc='upper right')

        base_path = file_dir[:-9]
        save_path = base_path + "/train_log.png"
        plt.savefig(save_path)
        if(show):
            plt.show()
        save_path = base_path + "/train_log.csv"
        result.to_csv(save_path, index=False)

In [None]:
import numpy as np
import torch
import torch.nn as nn
from concurrent.futures import ThreadPoolExecutor
import datetime
import json
import wandb

# define the abstract class for recommender system
class AbstractRS(nn.Module):
    def __init__(self, args, special_args) -> None:
        super(AbstractRS, self).__init__()

        # basic information
        self.args = args
        self.special_args = special_args
        self.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
        self.test_only = args.test_only
        self.candidate = args.candidate

        self.Ks = args.Ks
        self.patience = args.patience
        self.modeltype = args.modeltype
        self.neg_sample = args.neg_sample
        self.inbatch = self.args.infonce == 1 and self.args.neg_sample == -1

        # basic hyperparameters
        self.n_layers = args.n_layers
        self.lr = args.lr
        self.batch_size = args.batch_size
        self.max_epoch = args.epoch
        self.verbose = args.verbose

        # load the data
        self.dataset_name = args.dataset
        try:
            print('from models.'+ args.modeltype + ' import ' + args.modeltype + '_Data')
            exec('from models.'+ args.modeltype + ' import ' + args.modeltype + '_Data') # load special dataset
            self.data = eval(args.modeltype + '_Data(args)')
        except:
            print("no special dataset")
            self.data = Data(args) # load data from the path

        self.n_users = self.data.n_users
        self.n_items = self.data.n_items
        self.train_user_list = self.data.train_user_list
        self.valid_user_list = self.data.valid_user_list
        # = torch.tensor(self.data.population_list).cuda(self.device)
        self.user_pop = torch.tensor(self.data.user_pop_idx).type(torch.LongTensor)
        self.item_pop = torch.tensor(self.data.item_pop_idx).type(torch.LongTensor)
        self.user_pop_max = self.data.user_pop_max
        self.item_pop_max = self.data.item_pop_max

        # load the model
        self.running_model = args.modeltype + '_batch' if self.inbatch else args.modeltype
        # exec('from models.'+ args.modeltype + ' import ' + self.running_model) # import the model first
        exec(args.modeltype) # import the model first
        self.model = eval(self.running_model + '(args, self.data)') # initialize the model with the graph
        # self.model.cuda(self.device) removed


        # preparing for saving
        self.preperation_for_saving(args, special_args)

        # preparing for evaluation
        # self.not_candidate_dict = self.data.get_not_candidate() # load the not candidate dict
        self.evaluators, self.eval_names = self.get_evaluators(self.data) # load the evaluators


    # the whole pipeline of the training process
    def execute(self):

        self.save_args() # save the args
        # write args
        perf_str = str(self.args)
        with open(self.base_path + 'stats.txt','a') as f:
            f.write(perf_str+"\n")

        self.model, self.start_epoch = self.restore_checkpoint(self.model, self.base_path, self.device) # restore the checkpoint

        start_time = time.time()
        # train the model if not test only
        if not self.test_only:
            print("start training")
            self.train()
            # test the model
            print("start testing")
            self.model = self.restore_best_checkpoint(self.data.best_valid_epoch, self.model, self.base_path, self.device)
        end_time = time.time()
        self.model.eval() # evaluate the best model
        print_str = "The best epoch is % d, total training cost is %.1f" % (max(self.data.best_valid_epoch, self.start_epoch), end_time - start_time)
        with open(self.base_path +'stats.txt', 'a') as f:
            f.write(print_str + "\n")

        n_rets = {}
        for i,evaluator in enumerate(self.evaluators[:]):
            _, __, n_ret = evaluation(self.args, self.data, self.model, self.data.best_valid_epoch, self.base_path, evaluator, self.eval_names[i])
            n_rets[self.eval_names[i]] = n_ret

        self.recommend_top_k()
        self.document_hyper_params_results(self.base_path, n_rets)


    def save_args(self):
        # save the args
        with open(self.base_path + '/args.txt', 'w') as f:
            json.dump(self.args.__dict__, f, indent=2)

    # define the training process
    def train(self) -> None:
        # TODO
        self.set_optimizer() # get the optimizer
        self.flag = False
        for epoch in range(self.start_epoch, self.max_epoch):
            # print(self.model.embed_user.weight)
            if self.flag: # early stop
                break
            # All models
            t1=time.time()
            losses = self.train_one_epoch(epoch) # train one epoch
            t2=time.time()
            self.document_running_loss(losses, epoch, t2-t1) # report the loss
            if (epoch + 1) % self.verbose == 0: # evaluate the model
                self.eval_and_check_early_stop(epoch)

        visualize_and_save_log(self.base_path +'stats.txt', self.dataset_name)

    #! must be implemented by the subclass
    def train_one_epoch(self, epoch):
        raise NotImplementedError

    def preperation_for_saving(self, args, special_args):
        self.formatted_today=datetime.date.today().strftime('%m%d') + '_'

        tn = '1' if args.train_norm else '0'
        pn = '1' if args.pred_norm else '0'
        self.train_pred_mode = 't' + tn + 'p' + pn

        if(self.test_only == False):
            prefix = self.formatted_today + args.saveID
        else:
            prefix = args.saveID
        self.saveID = prefix + '_' + self.train_pred_mode + "_Ks=" + str(args.Ks) + '_patience=' + str(args.patience)\
            + "_n_layers=" + str(args.n_layers) + "_batch_size=" + str(args.batch_size)\
                + "_neg_sample=" + str(args.neg_sample) + "_lr=" + str(args.lr)

        for arg in special_args:
            print(arg, getattr(args, arg))
            self.saveID += "_" + arg + "=" + str(getattr(args, arg))

        self.modify_saveID()

        if self.modeltype == 'LightGCN' and self.n_layers == 0:
            self.base_path = 'recommenders/weights/{}/MF/{}'.format(self.dataset_name, self.saveID)
        elif self.n_layers > 0 and self.modeltype != "LightGCN":
            self.base_path = 'recommenders/weights/{}/{}-LGN/{}'.format(self.dataset_name, self.running_model, self.saveID)
        else:
            self.base_path = 'recommenders/weights/{}/{}/{}'.format(self.dataset_name, self.running_model, self.saveID)
        self.checkpoint_buffer=[]
        ensureDir(self.base_path)

    def modify_saveID(self):
        pass

    def set_optimizer(self):
        self.optimizer = torch.optim.Adam([param for param in self.model.parameters() if param.requires_grad == True], lr=self.lr)

    def document_running_loss(self, losses:list, epoch, t_one_epoch, prefix=""):
        loss_str = ', '.join(['%.5f']*len(losses)) % tuple(losses)
        perf_str = prefix + 'Epoch %d [%.1fs]: train==[' % (
                epoch, t_one_epoch) + loss_str + ']'
        with open(self.base_path + 'stats.txt','a') as f:
                f.write(perf_str+"\n")

    def document_hyper_params_results(self, base_path, n_rets):
        overall_path = '/'.join(base_path.split('/')[:-1]) + '/'
        hyper_params_results_path = overall_path + self.formatted_today + self.dataset_name + '_' + self.modeltype + '_' + self.args.saveID + '_hyper_params_results.csv'

        results = {'notation': self.formatted_today, 'train_pred_mode':self.train_pred_mode, 'best_epoch': max(self.data.best_valid_epoch, self.start_epoch), 'max_epoch': self.max_epoch, 'Ks': self.Ks, 'n_layers': self.n_layers, 'batch_size': self.batch_size, 'neg_sample': self.neg_sample, 'lr': self.lr}
        for special_arg in self.special_args:
            results[special_arg] = getattr(self.args, special_arg)

        for k, v in n_rets.items():
            if('test_id' not in k):
                # for metric in ['recall', 'ndcg', 'hit_ratio']:
                #     results[k + '_' + metric] = round(v[metric], 4)
                for metric in ['recall', 'ndcg']:
                    results[k + '_' + metric] = round(v[metric], 4)
        frame_columns = list(results.keys())
        # load former xlsx
        if os.path.exists(hyper_params_results_path):
            # hyper_params_results = pd.read_excel(hyper_params_results_path)
            hyper_params_results = pd.read_csv(hyper_params_results_path)
        else:
            # Create a new dataframe using the results.
            hyper_params_results = pd.DataFrame(columns=frame_columns)

        hyper_params_results = hyper_params_results._append(results, ignore_index=True)
        # to csv
        hyper_params_results.to_csv(hyper_params_results_path, index=False, float_format='%.4f')
        # hyper_params_results.to_excel(hyper_params_results_path, index=False)

    def recommend_top_k(self):
        test_users = list(self.data.test_user_list.keys())
        if(self.candidate == False):
            dump_dict = merge_user_list([self.data.train_user_list,self.data.valid_user_list])
        recommended_top_k = {}
        recommended_scores = {}
        test_users = DataIterator(test_users, batch_size=self.batch_size, shuffle=False, drop_last=False)
        for batch_id, batch_users in enumerate(test_users):
            if self.data.test_neg_user_list is not None:
                candidate_items = {u:list(self.data.test_user_list[u]) + self.data.test_neg_user_list[u] if u in self.data.test_neg_user_list.keys() else list(self.data.test_user_list[u]) for u in batch_users}

                ranking_score = self.model.predict(batch_users, None)  # (B,N)
                if not is_ndarray(ranking_score, float_type):
                    ranking_score = np.array(ranking_score, dtype=float_type)

                all_items = set(range(ranking_score.shape[1]))
                for idx, user in enumerate(batch_users):
                    # print(max(set(candidate_items[user])), )
                    not_user_candidates = list(all_items - set(candidate_items[user]))
                    ranking_score[idx,not_user_candidates] = -np.inf

                    pos_items = self.data.valid_user_list[user]
                    pos_items = [ x for x in pos_items if not x in self.data.test_user_list[user] ]
                    ranking_score[idx][pos_items] = -np.inf

                    recommended_top_k[user] = argmax_top_k(ranking_score[idx], self.Ks)
                    # ground_truth = self.data.test_user_list[user]
                    # hits = [1 if item in ground_truth else 0 for item in recommended_top_k[user]]
                    # print(sum(hits)/self.Ks)
                    recommended_scores[user] = ranking_score[idx][recommended_top_k[user]]
                    # print('finish one user')
            else:
                ranking_score = self.model.predict(batch_users, None)  # (B,N)
                if not is_ndarray(ranking_score, float_type):
                    ranking_score = np.array(ranking_score, dtype=float_type)
                # set the ranking scores of training items to -inf,
                # then the training items will be sorted at the end of the ranking list.

                for idx, user in enumerate(batch_users):
                    dump_items = dump_dict[user]
                    dump_items = [ x for x in dump_items if not x in self.data.test_user_list[user] ]
                    ranking_score[idx][dump_items] = -np.inf

                    recommended_top_k[user] = argmax_top_k(ranking_score[idx], self.Ks)
                    # recommended_scores[user] = ranking_score[idx][recommended_top_k[user]]
                    recommended_scores[user] = ranking_score[idx]
            print('finish recommend one batch', batch_id)

        recommended_top_k = dict(sorted(recommended_top_k.items(), key=lambda x: x[0]))
        # with open(self.base_path + '/recommend_top_k.txt', 'w') as f:
        #     for u, v in recommended_top_k.items():
        #         f.write(str(int(u)))
        #         for i in range(self.Ks):
        #             f.write(' ' + str(int(v[i])) + '+' + str(round(recommended_scores[u][i], 4)))
        #         f.write('\n')
        with open(self.base_path + '/recommend_top_k.txt', 'w') as f:
            for u, v in recommended_top_k.items():
                f.write(str(int(u)))
                for i in range(self.Ks):
                    f.write(' ' + str(int(v[i])))
                f.write('\n')
        print('finish recommend top k')



    # define the evaluation process
    def eval_and_check_early_stop(self, epoch):
        self.model.eval()

        for i,evaluator in enumerate(self.evaluators):
            is_best, temp_flag, n_ret = evaluation(self.args, self.data, self.model, epoch, self.base_path, evaluator, self.eval_names[i])
            if(not self.args.no_wandb):
                wandb.log(
                    data = {f"Recall@{self.Ks}": n_ret['recall'],
                            f"Hit Ratio@{self.Ks}": n_ret['recall'],
                            f"Precision@{self.Ks}": n_ret['precision'],
                            f"NDCG@{self.Ks}": n_ret['ndcg']},
                    step = epoch
                )
            if is_best:
                checkpoint_buffer=save_checkpoint(self.model, epoch, self.base_path, self.checkpoint_buffer, self.args.max2keep)

            # early stop?
            if temp_flag:
                self.flag = True
        # checkpoint_buffer=save_checkpoint(self.model, epoch, self.base_path, self.checkpoint_buffer, self.args.max2keep)

        self.model.train()

    # load the checkpoint
    def restore_checkpoint(self, model, checkpoint_dir, device, force=False, pretrain=False):
        """
        If a checkpoint exists, restores the PyTorch model from the checkpoint.
        Returns the model and the current epoch.
        """
        cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
                    if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]

        if not cp_files:
            print('No saved model parameters found')
            if force:
                raise Exception("Checkpoint not found")
            else:
                return model, 0,

        epoch_list = []

        regex = re.compile(r'\d+')

        for cp in cp_files:
            epoch_list.append([int(x) for x in regex.findall(cp)][0])

        epoch = max(epoch_list)


        if not force:
            print("Which epoch to load from? Choose in range [0, {})."
                .format(epoch), "Enter 0 to train from scratch.")
            print(">> ", end = '')
            # inp_epoch = int(input())

            if self.args.clear_checkpoints:
                print("Clear checkpoint")
                clear_checkpoint(checkpoint_dir)
                return model, 0,

            inp_epoch = epoch
            if inp_epoch not in range(epoch + 1):
                raise Exception("Invalid epoch number")
            if inp_epoch == 0:
                print("Checkpoint not loaded")
                clear_checkpoint(checkpoint_dir)
                return model, 0,
        else:
            print("Which epoch to load from? Choose in range [0, {}).".format(epoch))
            inp_epoch = int(input())
            if inp_epoch not in range(0, epoch):
                raise Exception("Invalid epoch number")

        filename = os.path.join(checkpoint_dir,
                                'epoch={}.checkpoint.pth.tar'.format(inp_epoch))

        print("Loading from checkpoint {}?".format(filename))

        # checkpoint = torch.load(filename, map_location = str(device))
        checkpoint = torch.load(filename, map_location=torch.device('cpu'))
        # print("finish load")

        try:
            if pretrain:
                model.load_state_dict(checkpoint['state_dict'], strict=False)
            else:
                model.load_state_dict(checkpoint['state_dict'])
            print("=> Successfully restored checkpoint (trained for {} epochs)"
                .format(checkpoint['epoch']))
        except:
            print("=> Checkpoint not successfully restored")
            raise

        return model, inp_epoch

    def restore_best_checkpoint(self, epoch, model, checkpoint_dir, device):
        """
        Restore the best performance checkpoint
        """
        cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
                    if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]

        filename = os.path.join(checkpoint_dir,
                                'epoch={}.checkpoint.pth.tar'.format(epoch))

        print("Loading from checkpoint {}?".format(filename))

        # checkpoint = torch.load(filename, map_location = str(device))
        checkpoint = torch.load(filename, map_location=torch.device('cpu'))

        model.load_state_dict(checkpoint['state_dict'])
        print("=> Successfully restored checkpoint (trained for {} epochs)"
            .format(checkpoint['epoch']))

        return model

    def get_evaluators(self, data, pop_mask=None):
        #if not self.args.pop_test:
        K_value = self.args.Ks
        if self.args.nodrop: # whether using the enhanced dataset
            eval_train_user_list = data.train_nodrop_user_list
        else:
            eval_train_user_list = data.train_user_list

        # if self.args.candidate:
        #     eval_valid = ProxyEvaluator(data,data.train_user_list,data.valid_user_list,top_k=[K_value],dump_dict=merge_user_list([data.train_user_list, data.test_user_list]))
        #     eval_test = ProxyEvaluator(data,data.train_user_list,data.test_user_list,top_k=[K_value],dump_dict=merge_user_list([data.train_user_list, data.valid_user_list]), user_neg_test = data.test_neg_user_list)

        # else:
        #     eval_valid = ProxyEvaluator(data,data.train_user_list,data.valid_user_list,top_k=[K_value],dump_dict=merge_user_list([data.train_user_list, data.test_user_list]))
        #     eval_test = ProxyEvaluator(data,data.train_user_list,data.test_user_list,top_k=[K_value],dump_dict=merge_user_list([data.train_user_list, data.valid_user_list]))


        if self.args.candidate:
            eval_valid = ProxyEvaluator(data,eval_train_user_list,data.valid_user_list,top_k=[K_value],dump_dict=merge_user_list([eval_train_user_list, data.test_user_list]))
            eval_test = ProxyEvaluator(data,eval_train_user_list,data.test_user_list,top_k=[K_value],dump_dict=merge_user_list([eval_train_user_list, data.valid_user_list]), user_neg_test = data.test_neg_user_list)

        else:
            eval_valid = ProxyEvaluator(data,eval_train_user_list,data.valid_user_list,top_k=[K_value],dump_dict=merge_user_list([eval_train_user_list, data.test_user_list]))
            eval_test = ProxyEvaluator(data,eval_train_user_list,data.test_user_list,top_k=[K_value],dump_dict=merge_user_list([eval_train_user_list, data.valid_user_list]))

        evaluators=[eval_valid, eval_test]
        eval_names=["valid", "test"]

        return evaluators, eval_names

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from tqdm import tqdm

class MF_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)

        # Assuming `self.model` is defined and initialized in `AbstractRS`
        self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)

    def train_one_epoch(self, epoch):
        running_loss, running_mf_loss, running_reg_loss, num_batches = 0, 0, 0, 0

        pbar = tqdm(enumerate(self.data.train_loader), mininterval=2, total = len(self.data.train_loader))
        for batch_i, batch in pbar:

            # batch = [x.cuda(self.device) for x in batch]
            users, pos_items, users_pop, pos_items_pop, pos_weights  = batch[0], batch[1], batch[2], batch[3], batch[4]

            if self.args.infonce == 0 or self.args.neg_sample != -1:
                neg_items = batch[5]
                neg_items_pop = batch[6]

            self.model.train()
            mf_loss, reg_loss = self.model(users, pos_items, neg_items)
            loss = mf_loss + reg_loss

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            running_loss += loss.detach().item()
            running_reg_loss += reg_loss.detach().item()
            running_mf_loss += mf_loss.detach().item()
            num_batches += 1
        return [running_loss/num_batches, running_mf_loss/num_batches, running_reg_loss/num_batches]


class MF(AbstractModel):
    def __init__(self, args, data):
        super().__init__(args, data)

    def forward(self, users, pos_items, neg_items):
        all_users, all_items = self.embed_user.weight, self.embed_item.weight

        users_emb = all_users[users]
        pos_emb = all_items[pos_items]
        neg_emb = all_items[neg_items]

        userEmb0 = self.embed_user(users)
        posEmb0 = self.embed_item(pos_items)
        negEmb0 = self.embed_item(neg_items)


        pos_scores = torch.sum(torch.mul(users_emb, pos_emb), dim=1)  # users, pos_items, neg_items have the same shape
        neg_scores = torch.sum(torch.mul(users_emb, neg_emb), dim=1)

        regularizer = 0.5 * torch.norm(userEmb0) ** 2 + 0.5 * torch.norm(posEmb0) ** 2 + 0.5 * torch.norm(negEmb0) ** 2
        regularizer = regularizer / self.batch_size

        maxi = torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10)
        mf_loss = torch.negative(torch.mean(maxi))
        reg_loss = self.decay * regularizer

        return mf_loss, reg_loss

    def predict(self, users, items=None):
        if items is None:
            items = list(range(self.data.n_items))

        all_users, all_items = self.embed_user.weight, self.embed_item.weight

        users = all_users[torch.tensor(users)]
        items = all_items[torch.tensor(items)]

        items = torch.transpose(items, 0, 1)
        rate_batch = torch.matmul(users, items) # user * item

        return rate_batch.cpu().detach().numpy()

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm

class InfoNCE_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)
        self.neg_sample =  args.neg_sample if args.neg_sample!=-1 else self.batch_size-1

        # Initialize optimizer (example with Adam optimizer)
        self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)

    def train_one_epoch(self, epoch):
        running_loss, running_mf_loss, running_reg_loss, num_batches = 0, 0, 0, 0

        pbar = tqdm(enumerate(self.data.train_loader), mininterval=2, total = len(self.data.train_loader))
        for batch_i, batch in pbar:

            # batch = [x.cuda(self.device) for x in batch]
            users, pos_items, users_pop, pos_items_pop, pos_weights  = batch[0], batch[1], batch[2], batch[3], batch[4]

            self.model.train()
            if(self.inbatch):
                mf_loss, reg_loss = self.model(users, pos_items)
            else:
                neg_items = batch[5]
                neg_items_pop = batch[6]
                mf_loss, reg_loss = self.model(users, pos_items, neg_items)

            loss = mf_loss + reg_loss

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            running_loss += loss.detach().item()
            running_reg_loss += reg_loss.detach().item()
            running_mf_loss += mf_loss.detach().item()
            num_batches += 1
        return [running_loss/num_batches, running_mf_loss/num_batches, running_reg_loss/num_batches]

class InfoNCE(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)
        self.tau = args.tau
        self.decay = args.decay

    def forward(self, users, pos_items, neg_items=None):
        users = users if isinstance(users, torch.Tensor) else torch.tensor(users)
        pos_items = pos_items if isinstance(pos_items, torch.Tensor) else torch.tensor(pos_items)
        if neg_items is not None:
            neg_items = neg_items if isinstance(neg_items, torch.Tensor) else torch.tensor(neg_items)


        all_users, all_items = self.compute()

        userEmb0 = self.embed_user(users)
        posEmb0 = self.embed_item(pos_items)
        # negEmb0 = self.embed_item(neg_items)

        users_emb = all_users[users]
        pos_emb = all_items[pos_items]
        # neg_emb = all_items[neg_items]

        if neg_items is not None:
            negEmb0 = self.embed_item(neg_items)
            neg_emb = all_items[neg_items]

            if(self.train_norm):
                users_emb = F.normalize(users_emb, dim = -1)
                pos_emb = F.normalize(pos_emb, dim = -1)
                neg_emb = F.normalize(neg_emb, dim = -1)

            pos_ratings = torch.sum(users_emb*pos_emb, dim = -1)
            # neg_ratings = torch.matmul(torch.unsqueeze(users_emb, 1),
            #                             neg_emb.permute(0, 2, 1)).squeeze(dim=1)
            neg_ratings = torch.matmul(users_emb, neg_emb.transpose(0, 1))
            numerator = torch.exp(pos_ratings / self.tau)
            denominator = numerator + torch.sum(torch.exp(neg_ratings / self.tau), dim = 1)
        else:
            users_emb = F.normalize(users_emb, dim=1)
            pos_emb = F.normalize(pos_emb, dim=1)
            ratings = torch.matmul(users_emb, torch.transpose(pos_emb, 0, 1))
            ratings_diag = torch.diag(ratings)
            numerator = torch.exp(ratings_diag / self.tau)
            denominator = torch.sum(torch.exp(ratings / self.tau), dim=1)

        ssm_loss = torch.mean(torch.negative(torch.log(numerator / denominator)))

        regularizer = 0.5 * torch.norm(userEmb0) ** 2 + 0.5 * torch.norm(posEmb0) ** 2
        if neg_items is not None:
            regularizer += 0.5 * torch.norm(negEmb0) ** 2
        reg_loss = self.decay * regularizer / self.batch_size
        ssm_loss = torch.mean(torch.negative(torch.log(numerator/denominator)))

        return ssm_loss, reg_loss

class InfoNCE_batch(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)
        self.tau = args.tau
        self.decay = args.decay

    def forward(self, users, pos_items):
        users = users if isinstance(users, torch.Tensor) else torch.tensor(users)
        pos_items = pos_items if isinstance(pos_items, torch.Tensor) else torch.tensor(pos_items)

        all_users, all_items = self.compute()

        userEmb0 = self.embed_user(users)
        posEmb0 = self.embed_item(pos_items)

        users_emb = all_users[users]
        pos_emb = all_items[pos_items]

        users_emb = F.normalize(users_emb, dim=1)
        pos_emb = F.normalize(pos_emb, dim=1)

        ratings = torch.matmul(users_emb, torch.transpose(pos_emb, 0, 1))
        ratings_diag = torch.diag(ratings)

        numerator = torch.exp(ratings_diag / self.tau)
        denominator = torch.sum(torch.exp(ratings / self.tau), dim=1)
        ssm_loss = torch.mean(torch.negative(torch.log(numerator / denominator)))

        regularizer = 0.5 * torch.norm(userEmb0) ** 2 + 0.5 * torch.norm(posEmb0) ** 2
        reg_loss = self.decay * regularizer / self.batch_size

        return ssm_loss, reg_loss

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm

class LightGCN_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)

    def train_one_epoch(self, epoch):
        running_loss, running_mf_loss, running_reg_loss, num_batches = 0, 0, 0, 0

        pbar = tqdm(enumerate(self.data.train_loader), mininterval=2, total = len(self.data.train_loader))
        for batch_i, batch in pbar:

            # batch = [x.cuda(self.device) for x in batch]
            users, pos_items, users_pop, pos_items_pop, pos_weights  = batch[0], batch[1], batch[2], batch[3], batch[4]

            if self.args.infonce == 0 or self.args.neg_sample != -1:
                neg_items = batch[5]
                neg_items_pop = batch[6]

            self.model.train()
            mf_loss, reg_loss = self.model(users, pos_items, neg_items)
            loss = mf_loss + reg_loss

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            # print(self.model.embed_user.weight)
            # print("?")

            running_loss += loss.detach().item()
            running_reg_loss += reg_loss.detach().item()
            running_mf_loss += mf_loss.detach().item()
            num_batches += 1
        return [running_loss/num_batches, running_mf_loss/num_batches, running_reg_loss/num_batches]


class LightGCN(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)

    def forward(self, users, pos_items, neg_items):
        all_users, all_items = self.compute()

        users_emb = all_users[users]
        pos_emb = all_items[pos_items]
        neg_emb = all_items[neg_items]
        userEmb0 = self.embed_user(users)
        posEmb0 = self.embed_item(pos_items)
        negEmb0 = self.embed_item(neg_items)

        if(self.train_norm == True):
            users_emb = F.normalize(users_emb, dim = -1)
            pos_emb = F.normalize(pos_emb, dim = -1)
            neg_emb = F.normalize(neg_emb, dim = -1)

        pos_scores = torch.sum(torch.mul(users_emb, pos_emb), dim=1)  # users, pos_items, neg_items have the same shape
        neg_scores = torch.sum(torch.mul(users_emb, neg_emb), dim=1)

        regularizer = 0.5 * torch.norm(userEmb0) ** 2 + 0.5 * torch.norm(posEmb0) ** 2 + 0.5 * torch.norm(negEmb0) ** 2
        regularizer = regularizer / self.batch_size

        maxi = torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10)
        mf_loss = torch.negative(torch.mean(maxi))
        reg_loss = self.decay * regularizer

        return mf_loss, reg_loss

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm

from torch.utils.data import DataLoader

from scipy.sparse import csr_matrix

# import random
import random as rd
import scipy.sparse as sp

def naive_sparse2tensor(data):
    return torch.FloatTensor(data.toarray())

class MultVAE_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)
        self.data = MultVAE_Data(args)
        self.total_anneal_steps = args.total_anneal_steps
        self.anneal_cap = args.anneal_cap
        self.update_count = 0
        self.set_optimizer()

    def set_optimizer(self):
        self.optimizer = torch.optim.Adam([param for param in self.model.parameters() if param.requires_grad == True], lr=self.lr)

    def loss_function(self, recon_x, x, mu, logvar, anneal=1.0):
        # BCE = F.binary_cross_entropy(recon_x, x)
        BCE = -torch.mean(torch.sum(F.log_softmax(recon_x, 1) * x, -1))
        KLD = -0.5 * torch.mean(torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1))

        return BCE + anneal * KLD

    def train_one_epoch(self, epoch):
        running_loss, num_batches = 0, 0

        n_users = self.data.n_users
        idxlist = np.arange(n_users)
        np.random.shuffle(idxlist)
        pbar = tqdm(enumerate(range(0, n_users, self.batch_size)))
        for batch_i, start_idx in pbar:
            end_idx = min(start_idx + self.batch_size, n_users)
            batch = self.data.ui_mat[idxlist[start_idx:end_idx]]
            batch = naive_sparse2tensor(batch)

            if self.total_anneal_steps > 0:
                anneal = min(self.anneal_cap,
                                1. * self.update_count / self.total_anneal_steps)
            else:
                anneal = self.anneal_cap

            self.optimizer.zero_grad()
            recon_batch, mu, logvar = self.model(batch)

            loss = self.loss_function(recon_batch, batch, mu, logvar, anneal)
            loss.backward()
            running_loss += loss.detach().item()
            num_batches += 1
            self.optimizer.step()

            self.update_count += 1
        return [running_loss/num_batches]


class MultVAE_Data(Data):
    def __init__(self, args):
        super().__init__(args)

    def add_special_model_attr(self, args):
        try:
            self.ui_mat = sp.load_npz(self.path + '/ui_mat.npz')
            print("successfully loaded ui_mat...")
        except FileNotFoundError:
            self.trainItem = np.array(self.trainItem)
            self.trainUser = np.array(self.trainUser)
            self.ui_mat = csr_matrix((np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
                                    shape=(self.n_users, self.n_items))
            sp.save_npz(self.path + '/ui_mat.npz', self.ui_mat)
            print("successfully saved ui_mat...")


class MultVAE(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)
        self.data = MultVAE_Data(args)
        self.p_dims = [args.p_dim0, args.p_dim1, data.n_items]
        self.q_dims = self.p_dims[::-1]

        # Last dimension of q- network is for mean and variance
        temp_q_dims = self.q_dims[:-1] + [self.q_dims[-1] * 2]
        self.q_layers = nn.ModuleList([nn.Linear(d_in, d_out) for
            d_in, d_out in zip(temp_q_dims[:-1], temp_q_dims[1:])])
        self.p_layers = nn.ModuleList([nn.Linear(d_in, d_out) for
            d_in, d_out in zip(self.p_dims[:-1], self.p_dims[1:])])

        self.drop = nn.Dropout(0.5)
        self.init_weights()

    def forward(self, input):
        mu, logvar = self.encode(input)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

    def encode(self, input):
        h = F.normalize(input)
        h = self.drop(h)

        for i, layer in enumerate(self.q_layers):
            h = layer(h)
            if i != len(self.q_layers) - 1:
                h = F.tanh(h)
            else:
                mu = h[:, :self.q_dims[-1]]
                logvar = h[:, self.q_dims[-1]:]
        return mu, logvar

    def reparameterize(self, mu, logvar):
        if self.training:
            std = torch.exp(0.5 * logvar)
            eps = torch.randn_like(std)
            return eps.mul(std).add_(mu)
        else:
            return mu

    def decode(self, z):
        h = z
        for i, layer in enumerate(self.p_layers):
            h = layer(h)
            if i != len(self.p_layers) - 1:
                h = F.tanh(h)
        return h

    def init_weights(self):
        for layer in self.q_layers:
            # Xavier Initialization for weights
            size = layer.weight.size()
            fan_out = size[0]
            fan_in = size[1]
            std = np.sqrt(2.0/(fan_in + fan_out))
            layer.weight.data.normal_(0.0, std)

            # Normal Initialization for Biases
            layer.bias.data.normal_(0.0, 0.001)

        for layer in self.p_layers:
            # Xavier Initialization for weights
            size = layer.weight.size()
            fan_out = size[0]
            fan_in = size[1]
            std = np.sqrt(2.0/(fan_in + fan_out))
            layer.weight.data.normal_(0.0, std)

            # Normal Initialization for Biases
            layer.bias.data.normal_(0.0, 0.001)

    def predict(self, users, items=None):
        if items is None:
            items = list(range(self.data.n_items))

        batch = naive_sparse2tensor(self.data.ui_mat[users])
        rate_batch, _, _ = self.forward(batch)

        return rate_batch.cpu().detach().numpy()

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm



class Pop_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)

    def train_one_epoch(self, epoch):
        return None

class Pop(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)

    def forward(self):
        return None

    def predict(self, users, items=None):
        if items is None:
            items = list(range(self.data.n_items))

        rating_matrix = np.zeros((len(users), len(items)))
        for i, user in enumerate(users):
            random_idx = np.random.choice(self.data.pop_candidates, 2*self.args.Ks, replace=False) # Select 20 items from pop_candidates each time.
            # print(sorted(random_idx))
            rating_matrix[i, random_idx] = 1
        # print(rating_matrix.sum())


        return rating_matrix

class Pop_Data(Data):
    def __init__(self, args):
        super().__init__(args)

    def add_special_model_attr(self, args):
        sorted_items = sorted(self.pop_item.items(), key=lambda x: x[1], reverse=True)
        self.pop_candidates = [x[0] for x in sorted_items[:30*args.Ks]]
        print("pop_candidates: ", sorted(self.pop_candidates))
        # pop_matrix = np.zeros((1, self.n_items))
        # Randomly select 20 items from pop_candidates.
        # rating_matrix = np.zeros((self.n_users, self.n_items))
        # for i, user in enumerate(range(self.n_users)):
        #     print(i, user)
        #     random_idx = np.random.choice(self.pop_candidates, 20, replace=False)
        #     rating_matrix[i, random_idx] = 1

        # Take the indices of the top 20 items in the rating_matrix.
        # np.argsort(-rating_matrix, axis=1)
        # print("??")
        # print(pop_matrix)
        # print(self.pop_candidates)
        # # return None

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm


class Random_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)

    def train_one_epoch(self, epoch):
        return None

class Random(AbstractModel):
    def __init__(self, args, data) -> None:
        super().__init__(args, data)

    def forward(self):
        return None

    def predict(self, users, items=None):
        if items is None:
            items = list(range(self.data.n_items))

        return np.random.rand(len(users), len(items))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm


class FM_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)
        self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)

    def train_one_epoch(self, epoch):
        running_loss, running_fm_loss, running_reg_loss, num_batches = 0, 0, 0, 0
        pbar = tqdm(enumerate(self.data.train_loader), mininterval=2, total=len(self.data.train_loader))

        for batch_i, batch in pbar:
            # batch = users, pos_items, neg_items, ...
            users, pos_items, _, _, _, neg_items, _ = batch  # adjust depending on dataset output

            self.model.train()
            fm_loss, reg_loss = self.model(users, pos_items, neg_items)  # pass all 3
            loss = fm_loss + reg_loss

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            running_loss += loss.detach().item()
            running_fm_loss += fm_loss.detach().item()
            running_reg_loss += reg_loss.detach().item()
            num_batches += 1

        return [
            running_loss / num_batches,
            running_fm_loss / num_batches,
            running_reg_loss / num_batches,
            ]


class FM(AbstractModel):
    def __init__(self, args, data):
        super().__init__(args, data)
        self.n_features = data.n_users + data.n_items
        self.k = args.embed_size

        # FM parameters
        self.w0 = nn.Parameter(torch.zeros(1))
        self.w = nn.Parameter(torch.zeros(self.n_features))
        self.V = nn.Parameter(torch.randn(self.n_features, self.k) * 0.01)

    def _build_x(self, user, item):
        """Create one-hot features for a batch of (user, item) pairs"""
        batch_size = user.size(0)
        x = torch.zeros(batch_size, self.n_features, device=user.device)
        x[torch.arange(batch_size), user] = 1.0
        x[torch.arange(batch_size), self.data.n_users + item] = 1.0
        return x

    def forward(self, users, pos_items, neg_items):
        # Build one-hot vectors
        x_pos = self._build_x(users, pos_items)
        x_neg = self._build_x(users, neg_items)

        # Linear term
        linear_pos = self.w0 + torch.matmul(x_pos, self.w)
        linear_neg = self.w0 + torch.matmul(x_neg, self.w)

        # Pairwise interactions
        inter_pos = 0.5 * torch.sum(
            torch.pow(torch.matmul(x_pos, self.V), 2) -
            torch.matmul(torch.pow(x_pos, 2), torch.pow(self.V, 2)), dim=1
        )
        inter_neg = 0.5 * torch.sum(
            torch.pow(torch.matmul(x_neg, self.V), 2) -
            torch.matmul(torch.pow(x_neg, 2), torch.pow(self.V, 2)), dim=1
        )

        # Scores
        pos_scores = linear_pos + inter_pos
        neg_scores = linear_neg + inter_neg

        # BPR loss
        maxi = torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10)
        fm_loss = -torch.mean(maxi)

        # Regularization
        reg_loss = self.decay * (
            0.5 * torch.norm(self.w) ** 2 + 0.5 * torch.norm(self.V) ** 2
        ) / self.batch_size

        return fm_loss, reg_loss

    def predict(self, users, items=None):
        if not torch.is_tensor(users):
           users = torch.tensor(users, device=self.w.device)

        if items is None:
           items = torch.arange(self.data.n_items, device=users.device)

        batch_size = users.shape[0]
        n_items = items.shape[0]

        # Repeat users for all items
        user_expand = users.unsqueeze(1).repeat(1, n_items).flatten()
        item_expand = items.unsqueeze(0).repeat(batch_size, 1).flatten()

        # Linear terms
        linear_u = self.w[user_expand]
        linear_i = self.w[self.data.n_users + item_expand]
        linear = self.w0 + linear_u + linear_i

        # Interaction terms (only between user and item embeddings)
        vu = self.V[user_expand]                # [batch_size * n_items, k]
        vi = self.V[self.data.n_users + item_expand]  # [batch_size * n_items, k]
        inter = torch.sum(vu * vi, dim=1)

        scores = (linear + inter).view(batch_size, n_items)

        return scores.detach().cpu().numpy()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm


class SASRec_RS(AbstractRS):
    def __init__(self, args, special_args) -> None:
        super().__init__(args, special_args)
        # optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)

    def train_one_epoch(self, epoch):
        running_loss, running_rec_loss, running_reg_loss, num_batches = 0, 0, 0, 0
        pbar = tqdm(enumerate(self.data.train_loader), mininterval=2, total=len(self.data.train_loader))

        for batch_i, batch in pbar:
            # Expecting TrainDataset to yield (user, pos_item, user_pop, pos_item_pop, pos_weight, neg_item, neg_item_pop)
            # We'll extract user, pos_items, neg_items to feed SASRec
            users = torch.tensor(batch[0], dtype=torch.long, device=self.model.device)
            pos_items = torch.tensor(batch[1], dtype=torch.long, device=self.model.device)
            # some datasets return neg as python int, ensure tensor
            if len(batch) > 5:
                neg_items = torch.tensor(batch[5], dtype=torch.long, device=self.model.device)
            else:
                # fallback: sample negatives on the fly
                neg_items = torch.randint(0, self.data.n_items, (len(users),), device=self.model.device).long()

            self.model.train()
            rec_loss, reg_loss = self.model(users, pos_items, neg_items)
            loss = rec_loss + reg_loss

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()
            running_rec_loss += rec_loss.item()
            running_reg_loss += reg_loss.item()
            num_batches += 1

        return [running_loss / num_batches, running_rec_loss / num_batches, running_reg_loss / num_batches]


class SASRec(AbstractModel):
    """A lightweight SASRec-like sequential recommender.

    Behavior:
      - It uses each user's last `max_seq_len` interactions from self.data.train_user_list
      - For each batch of users it constructs padded sequences and attention masks
      - The model encodes the sequence with TransformerEncoder layers and uses the
        representation of the last non-pad position as the user sequential state
      - Final score = dot(seq_repr, item_embedding)

    Forward signature mirrors MF/FM: forward(users, pos_items, neg_items)
    Returns: (bpr_loss, reg_loss)
    """

    def __init__(self, args, data):
        super().__init__(args, data)
        # hyperparams (falls back to reasonable defaults)
        self.hidden_dim = args.embed_size
        self.num_blocks = args.num_blocks
        self.num_heads = args.num_heads
        self.max_seq_len = args.max_seq_len
        self.dropout = args.dropout

        # item embeddings (learnable)
        self.item_embedding = nn.Embedding(self.data.n_items, self.hidden_dim, padding_idx=None)
        # positional embeddings
        self.pos_embedding = nn.Embedding(self.max_seq_len, self.hidden_dim)

        # transformer encoder blocks
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.hidden_dim, nhead=self.num_heads,
                                                   dim_feedforward=4 * self.hidden_dim,
                                                   dropout=self.dropout, activation='gelu', batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=self.num_blocks)

        # layernorm and dropout
        self.layer_norm = nn.LayerNorm(self.hidden_dim)
        self.dropout_layer = nn.Dropout(self.dropout)

        # init
        nn.init.xavier_uniform_(self.item_embedding.weight)
        nn.init.xavier_uniform_(self.pos_embedding.weight)

    def _build_user_seq(self, users):
        """Return (seq_tensor, seq_mask, seq_lengths)
        seq_tensor: [B, L] long tensor of item indices (padded with 0)
        seq_mask: [B, L] bool tensor (True where padded)
        seq_lengths: [B] actual lengths

        We use item index 0 as pad only if 0 exists; to avoid collision we will create mask using -1
        """
        B = len(users)
        L = self.max_seq_len
        seqs = torch.zeros((B, L), dtype=torch.long, device=self.item_embedding.weight.device)
        mask = torch.zeros((B, L), dtype=torch.bool, device=self.item_embedding.weight.device)
        lengths = torch.zeros(B, dtype=torch.long, device=self.item_embedding.weight.device)

        for i, u in enumerate(users.tolist() if isinstance(users, torch.Tensor) else users):
            hist = self.data.train_user_list.get(int(u), [])
            if len(hist) == 0:
                # leave zeros (item id 0) and mark mask all True
                lengths[i] = 0
                mask[i, :] = True
            else:
                seq = hist[-L:]
                seq_len = len(seq)
                lengths[i] = seq_len
                # pad left: place seq at positions L - seq_len : L
                start = L - seq_len
                seqs[i, start:] = torch.tensor(seq, dtype=torch.long, device=self.item_embedding.weight.device)
                mask[i, :start] = True
        return seqs, mask, lengths

    def forward(self, users, pos_items, neg_items):
        """Train step: builds sequences for users, encodes them, computes BPR loss.
        users, pos_items, neg_items are 1D long tensors (or lists of ids)
        """
        # ensure tensors
        if not torch.is_tensor(users):
            users = torch.tensor(users, dtype=torch.long, device=self.item_embedding.weight.device)
        if not torch.is_tensor(pos_items):
            pos_items = torch.tensor(pos_items, dtype=torch.long, device=self.item_embedding.weight.device)
        if not torch.is_tensor(neg_items):
            neg_items = torch.tensor(neg_items, dtype=torch.long, device=self.item_embedding.weight.device)

        # Build sequence batch
        seqs, pad_mask, lengths = self._build_user_seq(users)

        # Embeddings
        seq_emb = self.item_embedding(seqs)  # [B, L, d]
        pos = torch.arange(self.max_seq_len, device=seqs.device).unsqueeze(0).expand(seqs.size(0), -1)
        pos_emb = self.pos_embedding(pos)
        seq_emb = seq_emb + pos_emb
        seq_emb = self.dropout_layer(seq_emb)

        # Create attention mask for transformer: True for positions to be masked -> use key_padding_mask
        key_padding_mask = pad_mask

        # Transformer encoding
        # transformer expects (batch, seq, feat) when batch_first=True
        encoded = self.transformer_encoder(seq_emb, src_key_padding_mask=key_padding_mask)
        encoded = self.layer_norm(encoded)

        # Get representation for each user: use representation of last non-pad position
        B, L, D = encoded.size()
        last_indices = (lengths - 1).clamp(min=0)  # if length 0 -> index 0 (will be pad)
        # gather
        seq_repr = encoded[torch.arange(B, device=encoded.device), last_indices]  # [B, D]

        # item embeddings for pos and neg
        pos_embs = self.item_embedding(pos_items)
        neg_embs = self.item_embedding(neg_items)

        # Scores via dot product
        pos_scores = torch.sum(seq_repr * pos_embs, dim=1)
        neg_scores = torch.sum(seq_repr * neg_embs, dim=1)

        # BPR-like loss
        loss_term = -torch.mean(torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10))

        # Regularization on embeddings and transformer weights (a light version)
        reg = 0.0
        reg += 0.5 * torch.norm(self.item_embedding.weight) ** 2
        reg += 0.5 * torch.norm(self.pos_embedding.weight) ** 2
        reg = reg * (self.decay / max(1, users.size(0)))

        return loss_term, reg

    def predict(self, users, items=None):
        """Return scores shaped [len(users), len(items)] similar to MF.predict
        users can be list or tensor
        items default all items
        """
        device = self.item_embedding.weight.device
        if items is None:
            items = torch.arange(self.data.n_items, device=device)
        else:
            items = torch.tensor(items, device=device)

        if not torch.is_tensor(users):
            users = torch.tensor(users, device=device)

        # build sequences for batch users
        seqs, pad_mask, lengths = self._build_user_seq(users)
        seq_emb = self.item_embedding(seqs) + self.pos_embedding(torch.arange(self.max_seq_len, device=device).unsqueeze(0))
        encoded = self.transformer_encoder(seq_emb, src_key_padding_mask=pad_mask)
        encoded = self.layer_norm(encoded)
        last_indices = (lengths - 1).clamp(min=0)
        seq_repr = encoded[torch.arange(len(users), device=device), last_indices]  # [B, D]

        # compute item embeddings
        item_embs = self.item_embedding(items)  # [n_items, D]

        # score matrix = seq_repr @ item_embs.T
        scores = torch.matmul(seq_repr, item_embs.transpose(0, 1))
        return scores.detach().cpu().numpy()


In [None]:
class abstract_avatar:
    def __init__(self, args, avatar_id):
        super().__init__()
        self.args = args
        self.avatar_id = avatar_id
        self.use_wandb = args.use_wandb
        self.memory = None

    def _reaction(self):
        """
        Summarize the feelings of the avatar for recommended item list.
        """
        raise NotImplementedError

    def reflection(self):
        """
        Reflect on the observation bank
        """
        raise NotImplementedError

    def up_date_taste(self):
        """
        Update the taste of the avatar
        """
        raise NotImplementedError


In [None]:
class abstract_memory:
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.memory_size = 0

    def add_memory(self):
        """
        Add one new memory to the memory bank
        """
        raise NotImplementedError

    def time_weighting(self):
        """
        Weighting the memory according to the time
        """
        raise NotImplementedError

    def importance_weighting(self):
        """
        Weighting the importance of memory according to
        the results of recommendation and the personal taste
        """
        raise NotImplementedError

    def reflect(self):
        """
        Generate a high level understanding of previous memories
        """
        raise NotImplementedError

In [None]:
import datetime
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple

from pydantic import BaseModel, Field

from langchain.schema import BaseRetriever, Document
from langchain.vectorstores.base import VectorStore
import numpy as np


def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:
    """Get the hours passed between two datetime objects."""
    return (time - ref_time).total_seconds() / 3600

class AvatarRetriver(BaseModel):
    """Retriever combining embedding similarity with recency."""

    vectorstore: VectorStore
    """The vectorstore to store documents and determine salience."""

    search_kwargs: dict = Field(default_factory=lambda: dict(k=100))
    """Keyword arguments to pass to the vectorstore similarity search."""

    # TODO: abstract as a queue
    memory_stream: List[Document] = Field(default_factory=list)
    """The memory_stream of documents to search through."""

    decay_rate: float = Field(default=0.01)
    """The exponential decay factor used as (1.0-decay_rate)**(hrs_passed)."""

    k: int = 10
    """The maximum number of documents to retrieve in a given call."""

    other_score_keys: List[str] = []
    """Other keys in the metadata to factor into the score, e.g. 'importance'."""

    default_salience: Optional[float] = None
    """The salience to assign memories not retrieved from the vector store.

    None assigns no salience to documents not fetched from the vector store.
    """
    class Config:
        """Configuration for this pydantic object."""

        arbitrary_types_allowed = True

    def _get_combined_score(
        self,
        document: Document,
        vector_relevance: Optional[float],
        current_time: datetime.datetime,
    ) -> float:
        """Return the combined score for a document."""
        hours_passed = _get_hours_passed(
            current_time,
            document.metadata["last_accessed_at"],
        )
        score = (1.0 - self.decay_rate) ** hours_passed
        for key in self.other_score_keys:
            if key in document.metadata:
                score += document.metadata[key]
        if vector_relevance is not None:
            score += vector_relevance
        return score

    def _get_combined_score_list(
        self,
        document: Document,
        vector_relevance: Optional[float],
        current_time: datetime.datetime,
    ) -> float:
        """Return the combined score for a document."""
        hours_passed = _get_hours_passed(
            current_time,
            document.metadata["last_accessed_at"],
        )
        if hours_passed < 0:
            hours_passed = 0
        # score_time = (1.0 - self.decay_rate) ** hours_passed
        score_time = 1
        if score_time > 1:
            score_time = 1
        list_scores = []
        list_scores.append(score_time)
        for key in self.other_score_keys:
            if key in document.metadata:
                # score += document.metadata[key]
                list_scores.append(document.metadata[key])
        if vector_relevance is not None:
            # score += vector_relevance
            list_scores.append(1-vector_relevance)
        return list_scores

    def get_salient_docs(self, query: str) -> Dict[int, Tuple[Document, float]]:
        """Return documents that are salient to the query."""
        docs_and_scores: List[Tuple[Document, float]]
        docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
            query, **self.search_kwargs
        )
        results = {}
        for fetched_doc, relevance in docs_and_scores:
            print(fetched_doc)
            if "buffer_idx" in fetched_doc.metadata:
                buffer_idx = fetched_doc.metadata["buffer_idx"]
                doc = self.memory_stream[buffer_idx]
                results[buffer_idx] = (doc, relevance)
        return results

    def get_relevant_documents(self, query: str, current_time: Optional[Any]) -> List[Document]:
        """Return documents that are relevant to the query."""
        print(query)
        if current_time is None:
            current_time = datetime.datetime.now()
        docs_and_scores = {
            doc.metadata["buffer_idx"]: (doc, self.default_salience)
            for doc in self.memory_stream[-self.k :]
        }

        print("docs_and_scores", docs_and_scores)
        print(self.get_salient_docs(query))
        # If a doc is considered salient, update the salience score
        docs_and_scores.update(self.get_salient_docs(query))
        rescored_docs = [
            (doc, self._get_combined_score_list(doc, relevance, current_time))
            for doc, relevance in docs_and_scores.values()
        ]

        score_array = [b for a,b in rescored_docs]
        score_array_np = np.array(score_array)
        delta_np = score_array_np.max(axis=0)-score_array_np.min(axis=0)
        delta_np = np.where(delta_np == 0, 1, delta_np)
        x_norm = (score_array_np-score_array_np.min(axis=0))/delta_np
        # Weight importance score less
        x_norm[:,0] = x_norm[:,0]*0.9
        x_norm[:,1] = x_norm[:,1]*0.9
        x_norm_sum = x_norm.sum(axis=1)
        rescored_docs = [
            (doc, score)
            for (doc, _), score in zip(rescored_docs,x_norm_sum)
        ]

        rescored_docs.sort(key=lambda x: x[1], reverse=True)
        result = []
        # Ensure frequently accessed memories aren't forgotten
        for doc, _ in rescored_docs[: self.k]:
            # TODO: Update vector store doc once `update` method is exposed.
            buffered_doc = self.memory_stream[doc.metadata["buffer_idx"]]
            buffered_doc.metadata["last_accessed_at"] = current_time
            result.append(buffered_doc)
        return result

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        """Return documents that are relevant to the query."""
        raise NotImplementedError

    def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
        """Add documents to vectorstore."""
        current_time = kwargs.get("current_time")
        if current_time is None:
            current_time = datetime.datetime.now()
        # Avoid mutating input documents
        dup_docs = [deepcopy(d) for d in documents]
        for i, doc in enumerate(dup_docs):
            if "last_accessed_at" not in doc.metadata:
                doc.metadata["last_accessed_at"] = current_time
            if "created_at" not in doc.metadata:
                doc.metadata["created_at"] = current_time
            doc.metadata["buffer_idx"] = len(self.memory_stream) + i
        self.memory_stream.extend(dup_docs)
        return self.vectorstore.add_documents(dup_docs, **kwargs)

    async def aadd_documents(
        self, documents: List[Document], **kwargs: Any
    ) -> List[str]:
        """Add documents to vectorstore."""
        current_time = kwargs.get("current_time")
        if current_time is None:
            current_time = datetime.datetime.now()
        # Avoid mutating input documents
        dup_docs = [deepcopy(d) for d in documents]
        for i, doc in enumerate(dup_docs):
            if "last_accessed_at" not in doc.metadata:
                doc.metadata["last_accessed_at"] = current_time
            if "created_at" not in doc.metadata:
                doc.metadata["created_at"] = current_time
            doc.metadata["buffer_idx"] = len(self.memory_stream) + i
        self.memory_stream.extend(dup_docs)
        return await self.vectorstore.aadd_documents(dup_docs, **kwargs)

In [None]:
import datetime
from langchain.schema import BaseMemory, Document
import openai
import logging
import re
from langchain.schema import BaseMemory, Document
from langchain.utils import mock_now
import time
import datetime
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple
from pydantic import BaseModel, Field
from langchain.schema import BaseRetriever, Document
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.vectorstores.base import VectorStore
import numpy as np
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
from langchain.chat_models import ChatOpenAI
from langchain.schema.language_model import BaseLanguageModel

import wandb

from termcolor import cprint


class AvatarMemory(BaseMemory):
    llm: BaseLanguageModel
    """The core language model."""
    memory_retriever: AvatarRetriver
    """The retriever to fetch related memories."""
    reflection_threshold: Optional[float] = None
    """When aggregate_importance exceeds reflection_threshold, stop to reflect."""
    importance_weight: float = 0.15
    """How much weight to assign the memory importance."""
    aggregate_importance: float = 0.0  # : :meta private:
    """Track the sum of the 'importance' of recent memories.
    Triggers reflection when it reaches reflection_threshold."""
    reflecting: bool = False
    now_key: str = "now"
    max_tokens_limit: int = 1200  # : :meta private:

    user_k_tokens: float = 0.0
    use_wandb: bool = False


    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, Any]) -> None:
        """Save the context of this model run to memory."""
        # TODO

    @property
    def memory_variables(self) -> List[str]:
        """Input keys this memory class will load dynamically."""
        # TODO

    def clear(self) -> None:
        """Clear memory contents."""
        # TODO

    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
        """Return key-value pairs given the text input to the chain."""
        # TODO

    @staticmethod
    def _parse_list(text: str) -> List[str]:
        """Parse a newline-separated string into a list of strings."""
        lines = re.split(r"\n", text.strip())
        lines = [line for line in lines if line.strip()]  # remove empty lines
        return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines]

    def fetch_memories(
        self, observation: str, now: Optional[datetime.datetime] = None
    ) -> List[Document]:
        """Fetch related memories."""
        print("Observation:", observation)
        #print(now)
        return self.memory_retriever.get_relevant_documents(observation,now)

    def format_memories_detail(self, relevant_memories: List[Document]) -> str:
        content = []
        for mem in relevant_memories:
            content.append(self._format_memory_detail(mem, prefix="- "))
        return "\n".join([f"{mem}" for mem in content])

    def _format_memory_detail(self, memory: Document, prefix: str = "") -> str:
        created_time = memory.metadata["created_at"].strftime("%B %d, %Y, %I:%M %p")
        return f"{prefix}{memory.page_content.strip()}"

    def format_memories_simple(self, relevant_memories: List[Document]) -> str:
        return "; ".join([f"{mem.page_content}" for mem in relevant_memories])

    def _get_memories_until_limit(self, consumed_tokens: int) -> str:
        """Reduce the number of tokens in the documents."""
        result = []
        for doc in self.memory_retriever.memory_stream[::-1]:
            if consumed_tokens >= self.max_tokens_limit:
                break
            consumed_tokens += self.llm.get_num_tokens(doc.page_content)
            if consumed_tokens < self.max_tokens_limit:
                result.append(doc)
        return self.format_memories_simple(result)

    def get_completion(self, prompt, llm="gpt-4o-mini", temperature=0):
        global global_k_tokens
        global global_start_time
        global global_steps
        global global_last_tokens_record
        global global_interval
        global global_finished_users
        global global_finished_pages
        global global_error_cast
        global lock


        messages = [{"role":"user", "content" : prompt}]
        response = ''
        except_waiting_time = 1
        total_waiting_time = 0
        max_waiting_time = 16
        current_sleep_time = 0.5
        while response == '':
            try:
                if(self.use_wandb): # whether to use wandb
                    start_time = time.time()

                    if((start_time - global_start_time)//global_interval > global_steps):
                        if(lock.acquire(False)):
                            print("??", lock, (start_time - global_start_time)//global_interval, global_interval, global_steps)
                            print("\nMemory Start Identifier", start_time, global_start_time, (start_time - global_start_time), global_steps)
                            # vars.lock = True
                            global_steps += 1
                            wandb.log(
                                data = {"Real-time Traffic": global_k_tokens - global_last_tokens_record,
                                        "Total Traffic": global_k_tokens,
                                        "Finished Users": global_finished_users,
                                        "Finished Pages": global_finished_pages,
                                        "Error Cast": global_error_cast/1000
                                },
                                step = global_steps
                            )
                            global_last_tokens_record = global_k_tokens
                            # vars.lock = False
                            lock.release()
                            print("\nMemory End Identifier", time.time(), global_start_time, (time.time() - global_start_time), global_steps)

                response = openai.ChatCompletion.create(
                    model=llm,
                    messages=messages,
                    temperature=temperature,
                    request_timeout = 20,
                    max_tokens=1000
                )

                print("===================================")
                print(f'{response["usage"]["total_tokens"]} = {response["usage"]["prompt_tokens"]} + {response["usage"]["completion_tokens"]} tokens counted by the OpenAI API.')
                k_tokens = response["usage"]["total_tokens"]/1000
                self.user_k_tokens += k_tokens
                global_k_tokens += k_tokens
                if(response["usage"]["prompt_tokens"] > 2000):
                    cprint(prompt, color="white")

            except Exception as e:
                global_error_cast += 1
                total_waiting_time += except_waiting_time
                time.sleep(current_sleep_time)
                if except_waiting_time < max_waiting_time:
                    except_waiting_time *= 2
                    current_sleep_time = np.random.randint(0, except_waiting_time-1)

        return response.choices[0].message["content"]

    def _user_taste_reflection(self, last_k: int = 10) -> List[str]:
        """Return the user's taste about recent movies."""
        prompt = """
            The user has read following movie recently:
            <INPUT>\n\n
            Given only the information above, conclude the user's taste of movie using five adjective words, which should be conclusive, descriptive, and movie-genre related.
            The output format must be:
            user's recent taste are: <word1>,<word2>,<word3>,<word4>,<word5>.
            """

        observations = self.memory_retriever.memory_stream[-last_k:]
        # print(observations)
        observation_str = "\n".join(
            [self._format_memory_detail(o) for o in observations]
        )
        prompt_filled = prompt.replace("<INPUT>", observation_str)
        result = self.get_completion(prompt=prompt_filled, llm="gpt-4o-mini", temperature=0.2)
        # print(result)
        return result
    # "gpt-3.5-turbo"
    def _user_satisfaction_reflection(self, last_k: int = 10) -> List[str]:
        """Return the user's feeling about recent movies."""
        prompt = """
            <INPUT>\n\n
            Given only the information above, describe your feeling of the recommendation result using a sentence.
            The output format must be:
            [unsatisfied/satisfied] with the recommendation result because [reason].
            """


        observations = "what's your interaction history with each page of recommender?"
        relevant_memories = self.fetch_memories(observations)
        observation_str = self.format_memories_detail(relevant_memories)
        prompt_filled = prompt.replace("<INPUT>", observation_str)
        result = self.get_completion(prompt=prompt_filled, llm="gpt-4o-mini", temperature=0.2)

        # print(result)
        return result
        # return "satisfaction reflected"

    def _user_feeling_reflection(self, last_k: int = 10) -> List[str]:
        """Return the user's feeling about recent books."""
        #user persona: <INPUT 1>
        prompt = """
            user persona: a 22-year-old woman working in a clerical/administrative role. She is intelligent, imaginative, and adventurous. With a passion for movies, Emily has a diverse taste and enjoys a wide range of genres. Her favorite movie's producer name include ""Led Zeppelin,"" ""Pink Floyd,"" ""Wu-Tang Clan,"" ""Mos Def,"" and movie's actor name include ""Hocus Pocus,"" ""10,000 Days (Wings, Part 2).""  Emily's movie preferences reflect a wide range of movie genres and artists, from classic rock bands to hip-hop artist. She appreciates complex compositions and innovative sounds, gravitating towards artists who push the boundaries of traditional movie."
            3,3,"Hocus Pocus; 10,000 Days (Wings, Part 2)", Classic-rock-bands; Hip-hop,Male,45-49,clerical/admin,55421,"<Part 1>

            This user has read following movies recently:
            <INPUT 2>\n\n
            Given only the information above, describe the user's feeling of each of the movie he/she watch recently.
            """

        observations = self.memory_retriever.memory_stream[-last_k:]
        observation_str = "\n".join(
            [self._format_memory_detail(o) for o in observations]
        )
        prompt_filled = prompt.replace("<INPUT 2>", observation_str)
        result = self.get_completion(prompt=prompt_filled, llm="gpt-4o-mini", temperature=0.2)
        # print(result)
        return result

    def pause_to_reflect_taste(self, now: Optional[datetime.datetime] = None) -> List[str]:
        """Reflect on recent observations and generate 'insights'."""
        taste = self._user_taste_reflection()
        self.add_memory(taste, now=now)
        return 'taste reflected:\n'+ taste

    def pause_to_reflect_satisfaction(self, now: Optional[datetime.datetime] = None) -> List[str]:
        """Reflect on recent observations and generate 'insights'."""
        satisfaction = self._user_satisfaction_reflection()
        self.add_memory(satisfaction, now=now)
        return 'satisfaction reflected:\n'+ satisfaction

    def pause_to_reflect_feeling(self, now: Optional[datetime.datetime] = None) -> List[str]:
        """Reflect on recent observations and generate 'insights'."""
        feeling = self._user_feeling_reflection()
        self.add_memory(feeling, now=now)
        return 'feeling reflected:\n'+ feeling

    def add_memory(
        self, memory_content: str, now: Optional[datetime.datetime] = None
    ) -> List[str]:
        """Add an observation or memory to the agent's memory bank."""
        importance_score = 1
        self.aggregate_importance += importance_score
        document = Document(
            page_content=memory_content, metadata={"importance": importance_score}
        )
        result = self.memory_retriever.add_documents([document], current_time=now)

        # After an agent has processed a certain amount of memories (as measured by
        # aggregate importance), it is time to reflect on recent events to add
        # more synthesized memories to the agent's memory stream.
        if (
            self.reflection_threshold is not None
            and self.aggregate_importance > self.reflection_threshold
            and not self.reflecting
        ):
            self.reflecting = True
            self.reflect(now=now)
            # Hack to clear the importance from reflection
            self.aggregate_importance = 0.0
            self.reflecting = False
        return result

    def update_memory(self, reaction):
        """
        Update the memory bank with the reaction
        """
        return


    def time_weighting(self):
        """
        Weighting the memory according to the time
        """
        raise NotImplementedError

    def importance_weighting(self):
        """
        Weighting the importance of memory according to
        the results of recommendation and the personal taste
        """
        raise NotImplementedError

    def reflect(self, now: Optional[datetime.datetime] = None):
        """
        Generate a high level understanding of previous memories
        """
        # self.pause_to_reflect_taste(now=now)
        # self.pause_to_reflect_feeling(now=now)
        self.pause_to_reflect_satisfaction(now=now)
        return 0

In [None]:
from termcolor import colored, cprint
import openai
import os

import re
import numpy as np
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore import InMemoryDocstore
from langchain.chat_models import ChatOpenAI
import time
import datetime
import torch
from langchain.embeddings import OpenAIEmbeddings
import pandas as pd

import wandb


class Avatar(abstract_avatar):
    def __init__(self, args, avatar_id, init_property, init_statistic):
        super().__init__(args, avatar_id)

        self.parse_init_property(init_property)
        self.parse_init_statistic(init_statistic)

        self.log_file = f"storage/{args.dataset}/{args.modeltype}/{args.simulation_name}/running_logs/{avatar_id}.txt"
        if os.path.exists(self.log_file):
            os.remove(self.log_file)
        self.init_memory()

    def parse_init_property(self, init_property):
        self.taste = init_property["taste"].split("| ")
        self.high_rating = init_property["high_rating"]


    def parse_init_statistic(self, init_statistic):
        """
        Parse the init statistic of the avatar
        """
# activity_dict
        activity_dict = {   1:"An Incredibly Elusive Occasional Viewer, so seldom attracted by movie recommendations that it's almost a legendary event when you do watch a movie. Your movie-watching habits are extraordinarily infrequent. And you will exit the recommender system immediately even if you just feel little unsatisfied.",
                            2:"An Occasional Viewer, seldom attracted by movie recommendations. Only curious about watching movies that strictly align the taste. The movie-watching habits are not very infrequent. And you tend to exit the recommender system if you have a few unsatisfied memories.",
                            3:"A Movie Enthusiast with an insatiable appetite for films, willing to watch nearly every movie recommended to you. Movies are a central part of your life, and movie recommendations are integral to your existence. You are tolerant of recommender system, which means you are not easy to exit recommender system even if you have some unsatisfied memory."}
        # activity_dict = {   1: "An Incredibly Elusive Occasional Listener, so seldom attracted by music recommendations that it's almost a legendary event when you do listen to a new song. Your music-listening habits are extraordinarily infrequent, and you will exit the recommender system immediately even if you feel a little unsatisfied.",
        #                     2: "An Occasional Listener, seldom attracted by music recommendations. Only curious about listening to music that strictly aligns with your taste. Your music-listening habits are not very infrequent, and you tend to exit the recommender system if you have a few unsatisfactory experiences.",
        #                     3: "A Music Enthusiast with an insatiable appetite for new tunes, willing to listen to nearly every song recommended to you. Music is a central part of your life, and music recommendations are integral to your existence. You are tolerant of the recommender system, which means you are not easy to exit even if you have some unsatisfactory experiences."}
        # activity_dict = {     1: "An Incredibly Elusive Occasional Reader, rarely drawn to book recommendations. It's almost a legendary event when you pick up a new book. Your reading habits are extraordinarily infrequent, and you tend to stop engaging with the recommendation system quickly if the books don't immediately capture your interest.",
        #                       2: "An Occasional Reader, rarely attracted by book recommendations. You are selective and only interested in books that strictly align with your tastes. Your reading habits are not very frequent, and you may stop using the recommendation system after encountering a few books that don't meet your expectations.",
        #                       3: "A Book Enthusiast with an insatiable appetite for new reads, eager to explore almost every book recommended to you. Books are a central part of your life, and recommendations are integral to your reading journey. You are patient with the recommendation system, and it takes more than a few unsatisfactory books to deter you."}
# conformity_dict
        conformity_dict = { 1:"A Dedicated Follower who gives ratings heavily relies on movie historical ratings, rarely expressing independent opinions. Usually give ratings that are same as historical ratings. ",
                            2:"A Balanced Evaluator who considers both historical ratings and personal preferences when giving ratings to movies. Sometimes give ratings that are different from historical rating.",
                            3:"A Maverick Critic who completely ignores historical ratings and evaluates movies solely based on own taste. Usually give ratings that are a lot different from historical ratings."}
        # conformity_dict = {   1: "A Dedicated Follower who gives ratings heavily relying on historical music ratings, rarely expressing independent opinions. Usually gives ratings that are the same as historical ratings.",
        #                       2: "A Balanced Evaluator who considers both historical ratings and personal preferences when giving ratings to music. Sometimes gives ratings that are different from historical ratings.",
        #                       3: "A Maverick Critic who completely ignores historical ratings and evaluates music solely based on personal taste. Usually gives ratings that are significantly different from historical ratings."}
        # conformity_dict = {   1: "A Dedicated Follower who heavily relies on popular reviews and historical ratings when rating books. You rarely express independent opinions and usually rate books similarly to the average ratings.",
        #                       2: "A Balanced Evaluator who considers both popular reviews and personal preferences when rating books. You sometimes give ratings that differ from the average ratings based on your own experience.",
        #                       3: "A Maverick Critic who completely ignores popular reviews and historical ratings, evaluating books solely based on personal taste. You usually give ratings that are significantly different from the average ratings."}
# diversity_dict
        diversity_dict = {  1:"An Exceedingly Discerning Selective Viewer who watches movies with a level of selectivity that borders on exclusivity. The movie choices are meticulously curated to match personal taste, leaving no room for even a hint of variety.",
                            2:"A Niche Explorer who occasionally explores different genres and mostly sticks to preferred movie types.",
                            3:"A Cinematic Trailblazer, a relentless seeker of the unique and the obscure in the world of movies. The movie choices are so diverse and avant-garde that they defy categorization."}
        # diversity_dict = {   1: "An Exceedingly Discerning Selective Listener who listens to music with a level of selectivity that borders on exclusivity. The music choices are meticulously curated to match personal taste, leaving no room for even a hint of variety.",
        #                      2: "A Niche Explorer who occasionally explores different genres but mostly sticks to preferred music types.",
        #                      3: "A Musical Trailblazer, a relentless seeker of the unique and the obscure in the world of music. The music choices are so diverse and avant-garde that they defy categorization."}
        # diversity_dict = {    1: "An Exceedingly Discerning Selective Reader who reads books with a level of selectivity that borders on exclusivity. Your book choices are meticulously curated to match your specific taste, leaving little room for variety.",
        #                       2: "A Niche Explorer who occasionally explores different genres but mostly sticks to preferred types of books.",
        #                       3: "A Literary Trailblazer, a relentless seeker of unique and obscure works in the world of literature. Your reading choices are so diverse and avant-garde that they defy categorization."}
        self.conformity_group = init_statistic["conformity"]
        self.activity_group = init_statistic["activity"]
        self.diversity_group = init_statistic["diversity"]
        self.conformity_dsc = conformity_dict[self.conformity_group]
        self.activity_dsc = activity_dict[self.activity_group]
        self.diversity_dsc = diversity_dict[self.diversity_group]

    def init_memory(self):
        """
        Initialize the memory of the avatar
        """
        t1 = time.time()
        def score_normalizer(val: float) -> float:
            return 1 - 1 / (1 + np.exp(val))

        embeddings_model = OpenAIEmbeddings(request_timeout = 20)
        embedding_size = 1536
        index = faiss.IndexFlatL2(embedding_size)
        vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {}, relevance_score_fn=score_normalizer)

        LLM = ChatOpenAI(max_tokens=1000, temperature=0.3, request_timeout = 30)
        self.avatar_retriever = AvatarRetriver(vectorstore=vectorstore, k=5)
        self.memory = AvatarMemory(memory_retriever=self.avatar_retriever, llm=LLM, reflection_threshold=3, use_wandb = self.use_wandb)
        t2 = time.time()


        cprint(f"Avatar {self.avatar_id} is initialized with memory", color='green', attrs=['bold'])
        cprint(f"Time cost: {t2-t1}s", color='green', attrs=['bold'])



    def _reaction(self, messages=None, timeout=30):
        """
        Summarize the feelings of the avatar for recommended item list.
        """
        global global_k_tokens
        global global_start_time
        global global_steps
        global global_last_tokens_record
        global global_interval
        global global_finished_users
        global global_finished_pages
        global global_error_cast
        global lock
        response = ''
        except_waiting_time = 1
        max_waiting_time = 16
        current_sleep_time = 0.5
        while response == '':
            try:
                start_time = time.time()
                time_local = time.localtime(start_time)
                l_start = time.strftime("%Y-%m-%d %H:%M:%S",time_local)

                if(self.use_wandb): # whether to use wandb
                    if((start_time - global_start_time)//global_interval > global_steps):
                        print("\nStart Identifier", start_time, global_start_time, (start_time - global_start_time), global_steps)
                        if(lock.acquire(False)):
                            print("\nStart Identifier", start_time, global_start_time, (start_time - global_start_time), global_steps)
                            global_steps += 1
                            wandb.log(
                                data = {"Real-time Traffic": global_k_tokens - global_last_tokens_record,
                                        "Total Traffic": global_k_tokens,
                                        "Finished Users": global_finished_users,
                                        "Finished Pages": global_finished_pages,
                                        "Error Cast": global_error_cast/1000,
                                },
                                step = global_steps
                            )
                            global_last_tokens_record = global_k_tokens
                            lock.release()
                            print("\nEnd Identifier", time.time(), global_start_time, (time.time() - global_start_time), global_steps)

                completion = openai.ChatCompletion.create(
                    model="gpt-4o-mini",
                    messages=messages,
                    temperature=0.2,
                    request_timeout = timeout,
                    max_tokens=1000
                    )

                l_end = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
                k_tokens = completion["usage"]["total_tokens"]/1000
                print(f"User {self.avatar_id} used {k_tokens} tokens from {l_start} to {l_end}")
                self.memory.user_k_tokens += k_tokens
                global_k_tokens += k_tokens
                response = completion["choices"][0]["message"]["content"]
            except Exception as e:
                print(e)
                global_error_cast += 1
                time.sleep(current_sleep_time)
                if except_waiting_time < max_waiting_time:
                    except_waiting_time *= 2
                current_sleep_time = np.random.randint(0, except_waiting_time-1)

        return response


    def make_next_decision(self, remember=False, current_page=None):
        # controlling the memory element

        sys_prompt = ("You excel at role-playing. Picture yourself as a user exploring a movie recommendation system. You have the following social traits: " \
                    +f"\nYour activity trait is described as: {self.activity_dsc}"
                    +f"\nNow you are in Page {current_page}. You may get tired with the increase of the pages you have browsed. (above 2 pages is a little bit tired, above 3 pages is very tired)"
                    ) 

        prompt = ("Firstly, generate an overall feeling based on your memory, in accordance with your activity trait and your satisfaction on recommender system."
                +"\nIf your overall feeling is positive, write: POSITIVE: [reason]"
                +"\nIf it's negative, write: NEGATIVE: [reason]"
                +"\nNow, note that you must have to interact with 5 pages then you can decide to exit or your overall positive feeling is fully satisfied."
                +"\nAgain, you will exit the recommender system if you get so tired."
                +"\nTo leave, write: [EXIT]; Reason: [brief reason]"
                +"\nTo continue browsing, write: [NEXT]; Reason: [brief reason]"
            )
        messages = [{"role": "system",
                    "content": sys_prompt},
                    {"role": "user",
                    "content": prompt}]

        self.write_log("\n" + sys_prompt, color="blue")
        self.write_log("\n" + prompt, color="blue")
        response = self._reaction(messages)
        print("Next Decision Response: ", response)
        self.write_log("\n" + response, color="white")

        return response

    def response_to_question(self, question, remember=False):
        relevant_memories = self.memory.memory_retriever.memory_stream
        formated_relevant_memories = self.memory.format_memories_detail(relevant_memories)
        sys_prompt = (f"You excel at role-playing. Picture yourself as user {self.avatar_id} who has just finished exploring a movie recommendation system. You have the following social traits:"
                +f"\nYour activity trait is described as: {self.activity_dsc}"
                +f"\nYour conformity trait is described as: {self.conformity_dsc}"
                +f"\nYour diversity trait is described as: {self.diversity_dsc}"
                +f"\nBeyond that, your movie tastes are: {'; '.join(self.taste).replace('I ','')}. "
                +"\nThe activity characteristic pertains to the frequency of your movie-watching habits. The conformity characteristic measures the degree to which your ratings are influenced by historical ratings. The diversity characteristic gauges your likelihood of watching movie that may not align with your usual taste."
                )
        prompt = f"""
        Relevant context from user {self.avatar_id}'s memory:
        {formated_relevant_memories}
        Act as user {self.avatar_id}, assume you are having a interview, reponse the following question:
        {question}
        """


        messages = [{"role": "system",
                    "content": sys_prompt},
                    {"role": "user",
                    "content": prompt}]

        self.write_log("\n" + sys_prompt, color="blue")
        self.write_log("\n" + prompt, color="blue")
        response = self._reaction(messages)
        self.write_log("\n" + response, color="blue")
        #
        if(remember):
            self.memory.add_memory(f"I was asked '{question}', and I responsed: '{response}'"
                                , now=datetime.datetime.now())
        return response

    def reaction_to_forced_items(self, recommended_items_str):
        """
        Summarize the feelings of the avatar for recommended item list.
        """

        sys_prompt = ("Assume you are a user browsing movie recommendation system who has the following characteristics: "
                +f"\nYour movie tastes are: {'; '.join(self.taste).replace('I ','')}. ")
        prompt = (
                "##recommended list## \n"
                +recommended_items_str
                +"\nPlease choose movies in the ##recommended list## that you want to read and explain why. After reading the book, evaluate each book based on your characteristics, taste and historical ratings to give a rating from 1 to 5."
                +"\nYou only watch movies which aligh with your taste."
                +"\nUse this format: MOVIE: [movie title]; WATCH: [yes or no]; REASON: [brief reason]; RATING: [integer between 1-5];"
                "\nYou must judge all the movies. If you don't want to watch a movie, use WATCH: no; REASON: [brief reason]"
                +"\nEach response should be on one line. Do not include any additional information or explanations and stay grounded in reality."
        )
        messages = [{"role": "system",
                    "content": sys_prompt},
                    {"role": "user",
                    "content": prompt}]

        reaction = self._reaction(messages, timeout=20)

        return reaction

    def reaction_to_recommended_items(self, recommended_items_str, current_page):
        """
        Summarize the feelings of the avatar for recommended item list.
        """
        try:
            high_rating = self.high_rating.replace('You are','')
        except:
            high_rating = ''

        sys_prompt = ("You excel at role-playing. Picture yourself as a user exploring a movie recommendation system. You have the following social traits:"
                +f"\nYour activity trait is described as: {self.activity_dsc}"
                +f"\nYour conformity trait is described as: {self.conformity_dsc}"
                +f"\nYour diversity trait is described as: {self.diversity_dsc}"
                +f"\nBeyond that, your movie tastes are: {'; '.join(self.taste).replace('I ','')}. "
                +f"\nAnd your rating tendency is {high_rating}"#+f"{low_rating}"
                +"\nThe activity characteristic pertains to the frequency of your movie-watching habits. The conformity characteristic measures the degree to which your ratings are influenced by historical ratings. The diversity characteristic gauges your likelihood of watching movies that may not align with your usual taste."
                )
        # uncomment this one to use the memory effect.
        # if self.memory.memory_retriever.memory_stream:
        #     observation = "What movies have you watch on the previous pages of the current recommender system?"
        #     relevant_memories = self.memory.fetch_memories(observation)
        #     formated_relevant_memories = self.memory.format_memories_detail(relevant_memories)
        #     sys_prompt = sys_prompt +f"\nRelevant context from your memory:{formated_relevant_memories}"

        # prompt = (
        #         "#### Recommended List #### \n"
        #         + f"PAGE {current_page}\n"
        #         +recommended_items_str
        #         +"\nPlease respond to all the books in the ## Recommended List ## and provide explanations."
        #         +"\nFirstly, determine which books align with your taste and which do not, and provide reasons. You must respond to all the recommended books using this format:"
        #         +"\nBOOK: [book title]; ALIGN: [yes or no]; REASON: [brief reason]"
        #         +"\nSecondly, among the books that align with your tastes, decide the number of books you want to read based on your activity and diversity traits. Use this format:"
        #         +"\nNUM: [number of book you choose to read]; READ: [all book's title you choose to read]; REASON: [brief reason];"
        #         +"\nThirdly, assume it's your first time reading the books you've chosen, and rate them on a scale of 1-5 to reflect different degrees of liking, considering your feeling and conformity trait. Use this format:"
        #         +"\n BOOK: [book title you choose to read]; RATING: [integer between 1-5]; FEELING: [aftermath sentence]; "
        #         +"\n Do not include any additional information or explanations and stay grounded."
        # )
        prompt = (
                "#### Recommended List #### \n"
                + f"PAGE {current_page}\n"
                +recommended_items_str
                +"\nPlease respond to all the movies in the ## Recommended List ## and provide explanations."
                +"\nFirstly, determine which movies align with your taste and which do not, and provide reasons. You must respond to all the recommended movies using this format:"
                +"\nMOVIE: [movie title]; ALIGN: [yes or no]; REASON: [brief reason]"
                +"\nSecondly, among the movies that align with your tastes, chose only one movie you want to watch based on your activity and diversity traits. Use this format:"
                +"\nNUM: [one movie you choose to watch]; WATCH: [one movie's title you choose to watch]; REASON: [brief reason];"
                +"\nThirdly, assume it's your first time watching the movies you've chosen, and rate them on a scale of 1-5 to reflect different degrees of liking, considering your feeling and conformity trait. Use this format:"
                +"\n MOVIE: [movie title you choose to watch]; RATING: [integer between 1-5]; FEELING: [aftermath sentence]; "
                +"\n Do not include any additional information or explanations and stay grounded."
        )

        messages = [{"role": "system",
                    "content": sys_prompt},
                    {"role": "user",
                    "content": prompt}]

        self.write_log("\n" + sys_prompt, color="blue")
        self.write_log("\n" + prompt, color="blue")
        reaction = self._reaction(messages, timeout=10) # reaction
        self.write_log("\n" + reaction, color="yellow")

        # @ 2 Add user satisfaction information for this page.

        # =========================
        pattern1 = re.compile(r'MOVIE: (.+?); RATING: (\d+); FEELING: (.*)')
        match1 = pattern1.findall(reaction)
        pattern2 = re.compile(r'MOVIE: (.+?); ALIGN: (.+?); REASON: (.*)')
        match2 = pattern2.findall(reaction)
        musica_musict_pairs = [f"{book}" for book, align, reason in match2]
        all_movies = ", ".join(musica_musict_pairs)
        watched_movies = [f"{book}" for book, rating, feeling in match1]
        watched_movies_ratings = [rating.strip(';') for book, rating, feeling in match1]
        # like_movies = [f"{book}" for book, rating, feeling in match1 if int(rating.strip(';')) == 10]
        # dislike_movies = [f"{book}" for book, rating, feeling in match1 if (int(rating.strip(';')) < 9)]
        # like_movies = [f"{book}" for book, rating, feeling in match1 if int(7 <= rating.strip(';')) <= 10]
        like_movies = [f"{book}" for book, rating, feeling in match1 if int(rating.strip(';')) == 5]
        dislike_movies = [f"{book}" for book, rating, feeling in match1 if (int(rating.strip(';')) < 3)]
        dislike_movies.extend([f"{book}" for book, align, reason in match2 if align.strip(';').lower() == 'no'])
        self.memory.add_memory(f"The recommender recommended the following movies to me on page {current_page}: {all_movies}, among them, I watch {watched_movies} and rate them {watched_movies_ratings} respectively. I dislike the rest movies: {dislike_movies}."
            , now=datetime.datetime.now()
        )

        # User makes the next decision.
        next_decision = self.make_next_decision(current_page=current_page)
        if('[EXIT]' in next_decision or '[exit]' in next_decision):
            self.exit_flag = True
            self.memory.add_memory(f"After browsing {current_page} pages, I decided to leave the recommendation system."
                , now=datetime.datetime.now())

        else:
            self.memory.add_memory(f"Turn to page {current_page+1} of the recommendation."
                , now=datetime.datetime.now())
        #===========================

        return reaction

    def write_log(self, log, color=None, attrs=None, print=False):
        with open(self.log_file, 'a') as f:
            f.write(log + '\n')
            f.flush()
        if(print):
            cprint(log, color=color, attrs=attrs)

In [None]:
from argparse import Namespace
import json
import sys
import os
import re
import torch
import random
import pandas as pd

sys.path.append(sys.path[0] + "/recommenders")
sys.path.remove(sys.path[0] + "/recommenders")

class abstract_arena:
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.data_path = args.data_path
        self.dataset = args.dataset
        self.val_users = args.val_users
        self.val_ratio = args.val_ratio
        self.simulation_name = args.simulation_name
        self.device = torch.device('cpu')
        self.n_avatars = args.n_avatars
        self.modeltype = args.modeltype
        self.items_per_page = args.items_per_page
        self.execution_mode = args.execution_mode
        self.rec_gt = args.rec_gt
        self.model_path = "recommenders/weights/" + args.dataset + "/" + args.modeltype + "/" + args.model_path

        # self.model_path = "recommenders/weights/" + args.dataset + "/" + args.modeltype + "/" + args.model_path
        print("============================")
        print(self.model_path)

    def excute(self):
        """
        The whole process of the simulation
        """
        self.load_saved_args(self.model_path)
        self.prepare_dir()
        self.load_data()
        self.load_recommender()
        self.initialize_all_avatars()
        self.get_full_rankings()
        self.load_additional_info()
        if (self.val_users):
            self.validate_all_avatars()
        else:
            self.simulate_all_avatars()
            # self.save_results()

    def load_saved_args(self, model_path):
        """
        load the recommender args, which is saved when training the recommender
        """
        self.saved_args = Namespace()
        # If the path exists, read.
        if(os.path.exists(model_path + '/args.txt')):
            with open(model_path + '/args.txt', 'r') as f:
                self.saved_args.__dict__ = json.load(f)
        else:
            with open("recommenders/weights/default_args.txt", 'r') as f:
                self.saved_args.__dict__ = json.load(f)
        # View current directory.
        # self.saved_args.data_path = 'datasets/' # Modify the table of contents.
        self.saved_args.data_path = self.data_path
        self.saved_args.dataset = self.dataset
        self.saved_args.cuda = self.args.cuda
        self.saved_args.modeltype = self.modeltype
        # self.saved_args.nodrop = self.args.nodrop

    def prepare_dir(self):
        # make dir
        def ensureDir(dir_path):
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
        self.storage_base_path = f"storage/{self.dataset}/{self.modeltype}/" + self.simulation_name
        ensureDir(self.storage_base_path)
        # ensureDir(self.storage_base_path + "/avatars")
        ensureDir(self.storage_base_path + "/running_logs")
        ensureDir(self.storage_base_path + "/rankings")
        # ensureDir(self.storage_base_path + "/new_train")
        if os.path.exists(self.storage_base_path + "/system_log.txt"):
            os.remove(self.storage_base_path + "/system_log.txt")


    def load_data(self):
        """
        load the data for simulation
        """
        sys.path.append(sys.path[0] + "/recommenders")
        try:
            exec('from recommenders.models.'+ self.saved_args.modeltype + ' import ' + self.saved_args.modeltype + '_Data') # load special dataset
            print('from recommenders.models.'+ self.saved_args.modeltype + ' import ' + self.saved_args.modeltype + '_Data')
            self.data = eval(self.saved_args.modeltype + '_Data(self.saved_args)')
        except:
            print("no special dataset")
            self.data = Data(self.saved_args) # load data from the path
            print("finish loading data")
        sys.path.remove(sys.path[0] + "/recommenders")
        # import pickle
        # with open(f'datasets/{self.dataset}/simulation/movie_dict.pkl', 'rb') as f:
        #     self.movie_detail = pickle.load(f)
        # self.movie_detail = pd.read_csv(f'{self.dataset}/simulation/book_detail.csv')
        self.movie_detail = pd.read_csv(f'{self.data_path}/{self.dataset}/book_detail.csv')

    def load_recommender(self):
        """
        load the recommender for simulation
        """
        sys.path.append(sys.path[0] + "/recommenders")
        self.running_model = self.saved_args.modeltype
        # exec('from recommenders.models.'+ self.saved_args.modeltype + ' import ' + self.running_model) # import the model first
        exec(self.running_model) # import the model first
        self.model = eval(self.running_model + '(self.saved_args, self.data)') # initialize the model with the graph
        print("finish generating recommender")
        sys.path.remove(sys.path[0] + "/recommenders")

        # load the checkpoint
        def restore_checkpoint(model, checkpoint_dir, device):
            """
            If a checkpoint exists, restores the PyTorch model from the checkpoint.
            Returns the model and the current epoch.
            """
            cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
                        if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]
            if not cp_files:
                print('No saved model parameters found')
            epoch_list = []
            regex = re.compile(r'\d+')
            for cp in cp_files:
                epoch_list.append([int(x) for x in regex.findall(cp)][0])
            loading_epoch = max(epoch_list)

            filename = os.path.join(checkpoint_dir,
                                    'epoch={}.checkpoint.pth.tar'.format(loading_epoch))
            # print("Loading from checkpoint {}?".format(filename))

            checkpoint = torch.load(filename, map_location = str(device))
            model.load_state_dict(checkpoint['state_dict'])
            print("=> Successfully restored checkpoint (trained for {} epochs)"
                    .format(checkpoint['epoch']))

            return model, loading_epoch

        if(self.args.modeltype != "Random" and self.args.modeltype != "Pop"):
            print("loading checkpoint")
            self.model, self.loading_epoch = restore_checkpoint(self.model, self.model_path, self.device) # restore the checkpoint
        # self.model, self.loading_epoch = restore_checkpoint(self.model, self.model_path, self.device) # restore the checkpoint

    def get_full_rankings(self, filename = "full_rankings", batch_size = 512):
        """
        document the full rankings of the items,
        according to a specific cf model
        """
        # if(os.path.exists(self.storage_base_path + '/{}_{}.npy'.format(filename, self.n_avatars))):
        #     print("loading full rankings from storage")
        #     self.full_rankings = np.load(self.storage_base_path + '/{}_{}.npy'.format(filename, self.n_avatars))
        #     print("finish loading full rankings")
        #     print(type(self.full_rankings))
        # else:
        # dump_dict = merge_user_list([self.data.train_user_list,self.data.valid_user_list])
        print("nodrop?", self.data.nodrop)
        # @ Use valid data for simulation.
        if(self.data.nodrop):
            dump_dict = merge_user_list([self.data.train_nodrop_user_list, self.data.test_user_list])
        else:
            dump_dict = merge_user_list([self.data.train_user_list, self.data.test_user_list])
        # dump_dict = merge_user_list([self.data.train_user_list, self.data.test_user_list])
        score_matrix = np.zeros((len(self.simulated_avatars_id), self.data.n_items))
        simulated_avatars_iter = DataIterator(self.simulated_avatars_id, batch_size=batch_size, shuffle=False, drop_last=False)
        for batch_id, batch_users in tqdm(enumerate(simulated_avatars_iter)):
            ranking_score = self.model.predict(batch_users, None)  # (B,N)
            if not is_ndarray(ranking_score, float_type):
                ranking_score = np.array(ranking_score, dtype=float_type)
            # set the ranking scores of training items to -inf,
            # then the training items will be sorted at the end of the ranking list.

            for idx, user in enumerate(batch_users):
                dump_items = dump_dict[user]
                # dump_items = [ x for x in dump_items if not x in self.data.test_user_list[user] ]
                ranking_score[idx][dump_items] = -np.inf

                score_matrix[batch_id*batch_size+idx] = ranking_score[idx]

            print('finish recommend one batch', batch_id)
            # break

        print('finish generating score matrix')
        self.full_rankings = np.argsort(-score_matrix, axis=1)
        if(self.rec_gt):
            # for user in self.simulated_avatars_id:
            #     for idx, item in enumerate(self.data.train_user_list[user]):
            #         self.full_rankings[user][idx] = item
            gt_dict = pd.read_pickle('scripts/user_ground_truth.pkl')
            for user in self.simulated_avatars_id:
                for idx, item in enumerate(gt_dict[user]):
                    self.full_rankings[user][idx] = item
        np.save(self.storage_base_path + '/rankings/' + '/{}_{}.npy'.format(filename, self.n_avatars), self.full_rankings)

        print('finish get full rankings')

    def initialize_all_avatars(self):
        """
        initialize all avatars
        """
        # all_avatars = sorted(list(self.data.test_user_list.keys()))
        # self.simulated_avatars_id = all_avatars[:self.n_avatars]
        self.simulated_avatars_id = list(range(self.n_avatars))
        # print('simulated avatars', self.simulated_avatars_id)
        # self.simulated_avatars_id = sorted(random.sample(all_avatars, self.n_avatars))

    def page_generator(self):
        """
        generate one page items for one avatar
        """
        raise NotImplementedError

    def validate_all_avatars(self):
        """
        validate the users
        """
        raise NotImplementedError

    def simulate_all_avatars(self):
        """
        excute the simulation for all avatars
        """
        raise NotImplementedError

    def simulate_one_avatar(self):
        """
        excute the simulation for one avatar
        """
        raise NotImplementedError

    def save_results(self):
        """
        save the results of the simulation
        """
        raise NotImplementedError

    def load_additional_info(self):
        """
        load additional information for the simulation
        """
        pass

In [None]:
import torch
import pandas as pd
import csv
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import random
from collections import Counter
from scipy.stats import spearmanr
from collections import defaultdict

def calculate_entropy(movie_types):
    type_freq = {}
    for movie_type in movie_types:
        if movie_type in type_freq:
            type_freq[movie_type] += 1
        else:
            type_freq[movie_type] = 1

    total_movies = len(movie_types)

    entropy = 0
    for key in type_freq:
        prob = type_freq[key] / total_movies
        entropy -= prob * math.log2(prob)

    return entropy


def get_entropy(inters, data):
    genres = data.get_genres_by_id(inters)
    entropy = calculate_entropy(genres)
    return entropy

class evaluator_data:
    """
    Data class for loading data from local files.
    """

    def __init__(self):
        self.items = {}
        self.users = {}
        self.db = None
        self.tot_relationship_num = 0
        self.netwerk_density = 0.0
        self.role_id = -1
        self.interrating = {}
        self.user_ratings = {}
        self.item_ratings = {}
        self.load_items()
        self.load_users()
        self.load_interactions_rating()

    def load_items(self):
        """
        Load items from local file.
        """
        file_path = "book_detail.csv"
        with open(file_path, "r", newline="") as file:
            reader = csv.reader(file)
            next(reader)  # Skip the header line
            for row in reader:
                item_id, title, genre, rating = row
                self.items[int(item_id)] = {
                    "name": title.strip(),
                    "genre": genre,
                    "historical rating": float(rating),
                    "inter_cnt": 0,
                    "mention_cnt": 0,
                }

    def load_users(self):
        """
        Load users from local file.
        """
        file_path = "user_statistic.csv"
        cnt = 1
        with open(file_path, "r", newline="") as file:
            reader = csv.reader(file)
            next(reader)  # Skip the header line
            for row in reader:
                # print(len(row), row)
                user_id, activity,	diversity,	conformity  = row
                # user_id, name, gender, age, status, pos, neg = row
                self.users[cnt] = {
                    "activity": activity,
                    "diversity": diversity,
                    "conformity": conformity
                }
                cnt += 1

    def load_interactions_rating(self):
      """
      Load user-item interactions (with rating) from local file.
      Stores in self.interrating as a dict:
      {user_id: [(item_id, rating), ...], ...}
      """
      file_path = "/content/drive/MyDrive/S4065511/data/RecAgent/Movielens-1M/agent4rec/mapped_ratings.csv"
      with open(file_path, "r", newline="") as file:
        reader = csv.reader(file)
        header = next(reader)  # Skip the header line
        for row in reader:
            user_id, item_id, rating = row
            user_id = int(user_id)
            item_id = int(item_id)
            rating = int(rating)
            if user_id not in self.interrating:
                self.interrating[user_id] = []
            self.interrating[user_id].append((item_id, rating))

            if user_id not in self.user_ratings:
                self.user_ratings[user_id] = []
            self.user_ratings[user_id].append(rating)

            # Store item rating
            if item_id not in self.item_ratings:
                self.item_ratings[item_id] = []
            self.item_ratings[item_id].append(rating)

      # Compute and store average historical rating for each user
      self.user_avg_rating = {uid: sum(ratings)/len(ratings) for uid, ratings in self.user_ratings.items() if ratings}

      # Compute and store average historical rating for each item
      self.item_avg_rating = {iid: sum(ratings)/len(ratings) for iid, ratings in self.item_ratings.items() if ratings}


    def get_full_items(self):
        return list(self.items.keys())

    def get_inter_popular_items(self):
        """
        Get the most popular items based on the number of interactions.
        """
        ids = sorted(
            self.items.keys(), key=lambda x: self.items[x]["inter_cnt"], reverse=True
        )[:3]
        return self.get_item_names(ids)

    def add_inter_cnt(self, item_names):
        item_ids = self.get_item_ids(item_names)
        print("item ids:", item_ids)
        for item_id in item_ids:
            self.items[item_id]["inter_cnt"] += 1

    def add_mention_cnt(self, item_names):
        item_ids = self.get_item_ids(item_names)
        for item_id in item_ids:
            self.items[item_id]["mention_cnt"] += 1

    def get_mention_popular_items(self):
        """
        Get the most popular items based on the number of mentions.
        """
        ids = sorted(
            self.items.keys(), key=lambda x: self.items[x]["mention_cnt"], reverse=True
        )[:3]
        return self.get_item_names(ids)

    def get_item_names(self, item_ids):
        return ["<" + self.items[item_id]["name"] + ">" for item_id in item_ids]

    def get_item_ids(self, item_names):
        item_ids = []
        for item in item_names:
            for item_id, item_info in self.items.items():
                if item_info["name"] in item:
                    item_ids.append(item_id)
                    break
        return item_ids

    def get_item_ids_exact(self, item_names):
        """
        Get item ids from item names.
        I coundn't find any difference with the get_item_ids(item_names) function
        """
        item_ids = []
        for item in item_names:
            for item_id, item_info in self.items.items():
                if item_info["name"] == item:
                    item_ids.append(item_id)
                    break
        return item_ids

    def get_full_users(self):
        return list(self.users.keys())

    def get_user_names(self, user_ids):
        return [self.users[user_id]["name"] for user_id in user_ids]

    def get_user_ids(self, user_names):
        user_ids = []
        for user in user_names:
            for user_id, user_info in self.users.items():
                if user_info["name"] == user:
                    user_ids.append(user_id)
                    break
        return user_ids

    def get_user_num(self):
        """
        Return the number of users.
        """
        return len(self.users.keys())

    def get_item_num(self):
        """
        Return the number of items.
        """
        return len(self.items.keys())

    def search_items(self, item, k=50):
        """
        Search similar items from faiss db.
        Args:
            item: str, item name
            k: int, number of similar items to return
        """
        docs = self.db.similarity_search(item, k)
        item_names = [doc.page_content for doc in docs]
        return item_names

    def get_genres_by_id(self, item_ids):
        """
        Get genre of items by item id.
        """
        # return [self.items[item_id]["genre"] for item_id in item_ids]
        return [
            genre
            for item_id in item_ids
            for genre in self.items[item_id]["genre"].split('|')
        ]

    def hit_at_k(self, ground_truth, predicted, k):
        """Return 1 if any of the top-k predicted are relevant, else 0."""
        return int(bool(set(ground_truth) & set(predicted[:k])))

    def ndcg_at_k(self, ground_truth, predicted, k):
        """Compute NDCG@k for a single user."""
        def dcg(rel):
          return np.sum([(2**r - 1) / np.log2(i + 2) for i, r in enumerate(rel)])

        rel = [1 if item in ground_truth else 0 for item in predicted[:k]]
        ideal_rel = sorted([1]*min(len(ground_truth), k) + [0]*(k - min(len(ground_truth), k)), reverse=True)
        dcg_score = dcg(rel)
        idcg_score = dcg(ideal_rel)
        return dcg_score / idcg_score if idcg_score > 0 else 0.0

    def mse(self, ground_truth_ratings, predicted_ratings, items=None):
        """Compute MSE for ratings (on items in both sets)."""
        if items is None:
           items = set(ground_truth_ratings.keys()) & set(predicted_ratings.keys())
        else:
           items = set(items) & set(ground_truth_ratings.keys()) & set(predicted_ratings.keys())
        if not items:
           return np.nan
        errors = [(ground_truth_ratings[i] - predicted_ratings[i]) ** 2 for i in items]
        return np.mean(errors)

    def rmse(self, ground_truth_ratings, predicted_ratings, items=None):
        """Compute RMSE for ratings."""
        return np.sqrt(self.mse(ground_truth_ratings, predicted_ratings, items))

    def safe_log(self, x):
        """Numerically safe log."""
        return math.log(max(x, 1e-15))

    def ordered_probit_probs(self, pred_int, K, taus=None):
        """
        Compute ordered probit class probabilities for a predicted integer rating.

        pred_int : int
            The predicted integer rating (e.g., 1..K).
        K : int
            Number of rating classes (e.g., 5 for 1–5 stars).
        taus : list or array, optional
            Thresholds separating the ordered categories.
            If None, uses equally spaced thresholds [1.5, 2.5, ..., K-0.5].
        """

        if taus is None:
           taus = np.array([1.5 + i for i in range(K-1)])  # default thresholds

        assert len(taus) == K-1

        def Phi(z):
            return 0.5 * (1.0 + math.erf(z / math.sqrt(2.0)))  # Normal CDF

        probs = []
        for k in range(1, K+1):
           if k == 1:
              lower = -np.inf
              upper = taus[0]
           elif k == K:
              lower = taus[-1]
              upper = np.inf
           else:
              lower = taus[k-2]
              upper = taus[k-1]
           p_lower = 0.0 if lower == -np.inf else Phi((lower - pred_int))
           p_upper = 1.0 if upper == np.inf else Phi((upper - pred_int))
           probs.append(max(p_upper - p_lower, 1e-15))

        probs = np.array(probs)
        probs /= probs.sum()  # normalize
        return probs


class EvaluatorRS:

    def __init__(self, evaluator_data):
        self.data = evaluator_data
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.record = {}
        self.round_record = {}
        self.positive = {}
        self.interaction_dict = {}
        self.inter_df = None
        self.inter_num = 0
        for user in self.data.get_full_users():
            self.record[user] = []
            self.positive[user] = []
            self.round_record[user] = []
        self.user_data = {
            "user": [],
            "N_expose": [],
            "N_view": [],
            "N_like": [],
            "N_exit": [],
            "S_sat": []
            }
        self.rating_feeling = {
            "User": [],
            "Rating": [],
            "Feelings": []
        }

        # Store user interactions
        self.user_directory = defaultdict(list)

    def add_interaction(self, user_id, page, recommended, rated):
        self.user_directory[user_id].append({
        "page": page,
        "recommended_id": recommended,
        "rated_id": rated
        })

    def ordered_probit_loglik(self, y_true, y_pred_int, K=5, taus=None):
        """
        Compute log-likelihood for ordered probit model given integer predictions.

        y_true : list or array
           True ratings (1..K).
        y_pred_int : list or array
           Predicted integer ratings (1..K).
        K : int
           Number of rating categories (default 5).
        taus : list or array, optional
           Thresholds (default: equally spaced).
        """

        assert len(y_true) == len(y_pred_int), "Mismatch in true vs predicted length"
        ll = 0.0
        for t, p in zip(y_true, y_pred_int):
           probs = self.data.ordered_probit_probs(p, K, taus)
           ll += self.data.safe_log(probs[t-1])  # subtract 1 for 0-based index
        avg_ll = ll / len(y_true)
        return ll, avg_ll

    def update_user_interactions(self, user_id, new_items):
        """
        Updates the directory of user_id and interacted_items.
        - interaction_dict: dict mapping user_id -> set of interacted item ids
        - user_id: int or str
        - new_items: iterable of item ids (list, set, etc)

        After calling, interaction_dict[user_id] contains all unique interacted items.
        """
        # Ensure the user's interaction set exists
        if user_id not in self.interaction_dict:
          self.interaction_dict[user_id] = set()

        new_items = set(new_items) - self.interaction_dict[user_id]
        self.interaction_dict[user_id].update(new_items)

    def save_interaction(self):
        """
        Save the interaction history to a csv file.
        """
        inters = []
        users = self.data.get_full_users()
        for user in users:
            for item in self.positive[user]:
                new_row = {"user_id": user, "item_id": item, "rating": 1}
                inters.append(new_row)

            for item in self.record[user]:
                if item in self.positive[user]:
                    continue
                new_row = {"user_id": user, "item_id": item, "rating": 0}
                inters.append(new_row)

        df = pd.DataFrame(inters)
        df.to_csv(
            self.config["interaction_path"],
            index=False,
        )

        self.inter_df = df

    def add_user(self, user_id, N_expose, N_view, N_like, N_exit, S_sat):
        self.user_data["user"].append(user_id)
        self.user_data["N_expose"].append(N_expose)
        self.user_data["N_view"].append(N_view)
        self.user_data["N_like"].append(N_like)
        self.user_data["N_exit"].append(N_exit)
        self.user_data["S_sat"].append(S_sat)

    def add_review(self, user_id, rating, feelings):
        self.rating_feeling["User"].append(user_id)
        self.rating_feeling["Rating"].append(rating)
        self.rating_feeling["Feelings"].append(feelings)

    def satisfaction_metrics(self):
        sm_df = pd.DataFrame(self.user_data)
        if len(sm_df) == 0:
           return None  # no data yet

        metrics = {}
        sm_df["view_ratio"] = sm_df["N_view"] / sm_df["N_expose"]
        sm_df["like_ratio"] = sm_df["N_like"] / sm_df["N_expose"]

        metrics["P_view"] = sm_df["view_ratio"].mean()
        metrics["N_like"] = sm_df["N_like"].mean()
        metrics["P_like"] = sm_df["like_ratio"].mean()
        metrics["N_exit"] = sm_df["N_exit"].mean()
        metrics["S_sat"] = sm_df["S_sat"].mean()

        return metrics

    def get_entropy(
        self,
    ):
        tot_entropy = 0
        for user in self.record.keys():
            inters = self.record[user]
            genres = self.data.get_genres_by_id(inters)
            entropy = calculate_entropy(genres)
            tot_entropy += entropy

        return tot_entropy / len(self.record.keys())

    def calculate_user_metrics(
        self, user_id, sim_recommended, all_items, threshold = 3):
        """
        Evaluate precision, recall, (optionally real) accuracy, and F1 for a single user.

        Returns:
            dict: { 'precision': float, 'recall': float, 'accuracy': float, 'f1': float }
        """

        if user_id not in self.data.interrating:
           print("User not found in interrating")
           return {'precision': 0, 'recall': 0, 'accuracy': 0, 'f1': 0}


        ground_truth_pairs = self.data.interrating[user_id]
        gt_relevant = set(item for item, rating in ground_truth_pairs if rating >= threshold and item in all_items)
        sim_recommended = set(sim_recommended)
        all_items = set(all_items)

        TP = len(gt_relevant & sim_recommended)
        FP = len(sim_recommended - gt_relevant)
        FN = len(gt_relevant - sim_recommended)
        TN = len(all_items - (gt_relevant | sim_recommended))

        precision = TP / (TP + FP) if (TP + FP) else 0.0
        recall = TP / (TP + FN) if (TP + FN) else 0.0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0.0
        accuracy = (TP + TN) / len(all_items) if all_items else 0

        print("precision:", precision, "recall:", recall, "accuracy:", accuracy, "f1:", f1)
        return precision, recall, accuracy, f1

    def precisionandrecallk(
        self, user_id, recommended, k):
        if user_id not in self.data.interrating:
           return {'precision_at_k': 0, 'recall_at_k': 0}

        sim_recommended = list(dict.fromkeys(recommended))
        ground_truth_pairs = self.data.interrating[user_id]
        gt_relevant = set(item for item, rating in ground_truth_pairs if rating >= 3)
        recommended_at_k = sim_recommended[:k]
        hits = sum([1 for item in recommended_at_k if item in gt_relevant])
        precision_at_k = hits / k
        recall_at_k = hits / len(gt_relevant) if gt_relevant else 0
        return precision_at_k, recall_at_k



    def calculation_of_rating(self, user_id, item_id, book_rating):
        # item_ids = self.data.get_item_ids([item_names])
        if user_id in self.data.interrating:
           # Check for item in user's ratings
           for (itm, rating) in self.data.interrating[user_id]:
               if itm == item_id:
                  return (rating, book_rating)

        # If not found
        return (0, book_rating)


    def calc_mse_rmse_rating_percentages(self, rating_pairs):

        print("Incoming rating_pairs:", rating_pairs[:20])  # show first 20 pairs
        print("Total pairs:", len(rating_pairs))

        # Remove pairs with zero in ground truth or predicted rating
        filtered_pairs = [(gt, pred) for gt, pred in rating_pairs
                          if int(gt) != 0]

        print("After filtering:", filtered_pairs[:20])
        print("Remaining pairs:", len(filtered_pairs))

        if not filtered_pairs:
           # No valid data after filtering
           return None, None, {}, {}, None, None, None

        # Convert ratings to int
        gt = [int(gt) for gt, pred in filtered_pairs]
        pred = [int(pred) for gt, pred in filtered_pairs]
        mse = np.mean([(g - p) ** 2 for g, p in zip(gt, pred)])
        rmse = np.sqrt(mse)
        loglike, ob_loglike = self.ordered_probit_loglik(gt, pred)
        rho, p_value = spearmanr(gt, pred)

        gt_count = Counter(gt)
        pred_count = Counter(pred)
        total = len(filtered_pairs)

        gt_pct = {r: gt_count.get(r, 0) / total * 100 for r in range(1, 6)}
        pred_pct = {r: pred_count.get(r, 0) / total * 100 for r in range(1, 6)}
        return mse, rmse, gt_pct, pred_pct, loglike, ob_loglike, rho


In [None]:
from termcolor import colored, cprint
import pandas as pd
import os
import os.path as op
import json
import asyncio

import time
import re
import numpy as np
import pickle
import nest_asyncio
from concurrent.futures import ThreadPoolExecutor
from sklearn.metrics import mean_squared_error, mean_absolute_error

nest_asyncio.apply()

class Arena(abstract_arena):
    def __init__(self, args):
        super().__init__(args)


        self.max_pages = args.max_pages
        self.finished_num = 0
        self.results_df = pd.DataFrame(columns=["User", "Predicted", "Ground Truth"])
        global global_k_tokens
        global global_start_time
        global global_steps
        global global_last_tokens_record
        global global_interval
        global global_finished_users
        global global_finished_pages
        global global_error_cast
        global lock
        self.fulldata = evaluator_data()
        self.recommenderevaluator = EvaluatorRS(self.fulldata)
        self.pairs = []
        self.user_history = {i: [] for i in range(0, self.n_avatars)}
        self.user_interactions = {i: [] for i in range(0, self.n_avatars)}
        self.user_item_list = {i: [] for i in range(0, self.n_avatars)}
        self.av_hits = []
        self.av_ndcg = []
        self.hit = []
        self.ndcg = []

    def load_additional_info(self):

        # self.user_profile_csv = pd.read_csv(f'{self.dataset}/raw_data/agg_top_25.csv')
        self.user_profile_csv = pd.read_csv(f'{self.data_path}/{self.dataset}/agg_top_25.csv')

        # return super().load_additional_info()
        self.add_advert = self.args.add_advert
        self.display_advert = self.args.display_advert
        if(self.add_advert):
            self.total_adverts, self.clicked_adverts = 0, 0
            advert_pool = pd.read_pickle(f'{self.dataset}/simulation/advertisement_review.pkl')
            advert_dict = {'all': {**advert_pool['pop_high_rating'], **advert_pool['pop_low_rating'], **advert_pool['unpop_high_rating'], **advert_pool['unpop_low_rating']},
                        'pop_high':advert_pool['pop_high_rating'], 'pop_low':advert_pool['pop_low_rating'], 'unpop_high':advert_pool['unpop_high_rating'], 'unpop_low':advert_pool['unpop_low_rating']}
            # print(self.args.advert_type)
            self.advert = advert_dict[self.args.advert_type]
            self.advert_word = "The best movie you should not miss in your life! "

    def initialize_all_avatars(self):
        """
        initialize avatars
        """
        super().initialize_all_avatars()
        # self.persona_df = pd.read_csv(f"datasets/{self.dataset}/simulation/all_personas_like_information_house.csv")
        # self.persona_df = pd.read_csv(f"{self.dataset}/simulation/all_personas_description_modify.csv")
        self.persona_df = pd.read_csv(f"{self.data_path}/{self.dataset}/all_personas_description_modify.csv")
        # self.user_statistic = pd.read_csv(f'{self.dataset}/simulation/user_statistic.csv', index_col=0)
        self.user_statistic = pd.read_csv(f'{self.data_path}/{self.dataset}/user_statistic.csv', index_col=0)
        # @ avatars and evaluation indicators
        self.avatars = {}
        self.ratings = {}
        self.new_train_dict = {}
        self.exit_page = {}
        self.perf_per_page = {}
        self.watch = {}
        self.n_likes = {}
        self.remaining_users = list(range(self.n_avatars))

        for avatar_id in self.simulated_avatars_id:
            self.avatars[avatar_id] = Avatar(self.args, avatar_id, self.persona_df.loc[avatar_id], self.user_statistic.loc[avatar_id])
            self.new_train_dict[avatar_id] = self.data.train_user_list[avatar_id]
            self.ratings[avatar_id] = []
            self.n_likes[avatar_id] = []
            self.watch[avatar_id] = []
            self.exit_page[avatar_id] = 0
            self.perf_per_page[avatar_id] = []

    def page_generator(self, avatar_id):
        """
        generate one page items for one avatar
        """
        i = 0
        while (i+1)*self.items_per_page < self.data.n_items:
            yield self.full_rankings[avatar_id][i*self.items_per_page:(i+1)*self.items_per_page]
            i += 1

    def validate_all_avatars(self):
        global_start_time = time.time()
        print("global start time", global_start_time)
        # self.precision_list = []
        # self.recall_list = []
        # self.accuracy_list = []
        # self.f1_list = []
        # self.rmse_list = []
        # self.mae_list = []
        precisions = []
        recalls = []
        accuracies = []
        f1s = []
        read = []

        # initialize dictionaries to store values for each k
        precision_scores = {1: [], 3: [], 5: [], 10: []}
        recall_scores = {1: [], 3: [], 5: [], 10: []}
        self.start_time = time.time()

        loop = asyncio.get_event_loop()
        executor = ThreadPoolExecutor(max_workers=100)
        tasks = []

        t1 = time.time()
        for avatar_id in self.simulated_avatars_id:
            tasks.append(self.async_validate_one_avatar(avatar_id, loop, executor))
        loop.run_until_complete(asyncio.wait(tasks))
        t2 = time.time()
        print(f"Time cost: {t2-t1}s")

        for user_id in self.user_history:
            # print("user: ", user)
            # user_id = user + 1
            # read_books = recsys.data.get_item_ids(user_history[user_id])
            read_books = self.user_history[user_id]
            # print("read books: ", read_books)
            # print("read books length: ", len(read_books))
            item_set = self.recommenderevaluator.interaction_dict.get(user_id, set())
            all_item = list(item_set)
            # print("all item: ", all_item)
            # print("all item length: ", len(all_item))
            precision, recall, accuracy, f1 = self.recommenderevaluator.calculate_user_metrics(user_id, read_books, all_item)
            precisions.append(precision)
            recalls.append(recall)
            accuracies.append(accuracy)
            # read_list = recsys.data.get_item_ids(user_history[user_id])
            read_list = self.user_history[user_id]
            read.append(get_entropy(read_list, self.recommenderevaluator.data))
            f1s.append(f1)
            for k in [1, 3, 5, 10]:
                precisonk, recallk = self.recommenderevaluator.precisionandrecallk(user_id, self.user_history[user_id], k)
                print(f"Precision@{k}: {precisonk}, Recall@{k}: {recallk}")
                precision_scores[k].append(precisonk)
                recall_scores[k].append(recallk)

        mse, rmse, gt_pct, pred_pct, loglike, ob_loglike, spearman = self.recommenderevaluator.calc_mse_rmse_rating_percentages(self.pairs)


        avg_precision = sum(precisions) / len(precisions) if precisions else 0
        avg_recall = sum(recalls) / len(recalls) if recalls else 0
        avg_accuracy = sum(accuracies) / len(accuracies) if accuracies else 0
        avg_f1 = sum(f1s) / len(f1s) if f1s else 0


        round_summary = []
        # print("User Directory: ", self.recommenderevaluator.user_directory)
        for user, logs in self.recommenderevaluator.user_directory.items():
            current_round = 1
            prev_page = 0
            round_pages = []
            round_recommended = []
            round_rated = []

            for entry in logs:
                page = entry["page"]
                # print("page", page)

                # Detect round reset (page goes back to 1)
                if page <= prev_page:
                    # Save previous round
                    N_expose = len(round_recommended)
                    N_view = len(round_rated)
                    # NOTE: rated_id are item IDs, not ratings → adjust if ratings are available
                    N_like = len([r for r in round_rated if r > 3])
                    N_exit = len(round_pages)
                    S_sat = 5

                    self.recommenderevaluator.add_user(user, N_expose, N_view, N_like, N_exit, S_sat)

                    # Start new round
                    current_round += 1
                    round_pages = []
                    round_recommended = []
                    round_rated = []
                # Accumulate round data
                round_pages.append(page)
                round_recommended.extend(entry["recommended_id"])
                round_rated.extend(entry["rated_id"])
                prev_page = page
            # Save the last round after loop
            if round_pages:
                N_expose = len(round_recommended)
                N_view = len(round_rated)
                N_like = len([r for r in round_rated if r > 3])  # adjust if ratings exist
                N_exit = len(round_pages)
                S_sat = 5


                self.recommenderevaluator.add_user(user, N_expose, N_view, N_like, N_exit, S_sat)

        metrics = self.recommenderevaluator.satisfaction_metrics()
        print("Total Satisfaction Metrics (Pview, Nlike, Plike, Nexit, Ssat):", metrics)
        # calculate averages
        for k in [1, 3, 5, 10]:
            mavg_precision = np.mean(precision_scores[k])
            mavg_recall = np.mean(recall_scores[k])
            print(f"Average Precision@{k}: {mavg_precision:.4f}, Average Recall@{k}: {mavg_recall:.4f}")

        # with open(self.storage_base_path + "/validation_metrics.txt", 'w') as f:
        #     f.write(f"Total simulation time: {round(time.time() - self.start_time, 2)}s\n")
        #     f.write(f"n_avatars: {self.n_avatars}\n")
        #     f.write(f"Average precision: {np.mean(self.precision_list)}\n")
        #     f.write(f"Average recall: {np.mean(self.recall_list)}\n")
        #     f.write(f"Average accuracy: {np.mean(self.accuracy_list)}\n")
        #     f.write(f"Average f1: {np.mean(self.f1_list)}\n")

    async def async_validate_one_avatar(self, avatar_id, loop, executor):
        """
        async
        validate the effectiveness of the model for one avatar
        avatar_id: the id of the simulated avatar
        """
        avatar_ = self.avatars[avatar_id]
        train_list, val_list, test_list = self.data.train_user_list[avatar_id], self.data.valid_user_list[avatar_id], self.data.test_user_list[avatar_id]

        # Take the union for calculating precision.
        all_items = list(range(self.data.n_items))
        observed_items = list(set(train_list) | set(val_list) | set(test_list))
        selection_candidates = list(set(val_list) | set(test_list))
        unobserved_items = list(set(all_items) - set(observed_items))
        # Pick 5 randomly from the test_list.
        min_val = min(len(selection_candidates), 10//(self.val_ratio+1))


        test_observed_items = np.random.choice(selection_candidates, int(min_val), replace=False)
        print("x = ", len(test_observed_items))
        test_unobserved_items = np.random.choice(unobserved_items, int(min_val*self.val_ratio), replace=False)
        print("y = ", len(test_unobserved_items))

        forced_items_ids = np.concatenate((test_observed_items, test_unobserved_items))
        print("x + y = ", len(forced_items_ids))
        # Randomly shuffle.
        np.random.shuffle(forced_items_ids)

        forced_items = [self.movie_detail.loc[idx] for idx in forced_items_ids]

        truth_tmp = [self.movie_detail.loc[idx] for idx in test_observed_items]
        truth_list = ["<- Movie Title: " + item.title + " ->"
                            + " <- History ratings:" + str(round(item.rating, 2)) + " ->" + "\n"
                            for item in truth_tmp]
        truth_str = ''.join(truth_list)
        cprint(truth_str, color='white', attrs=['bold'])

        recommended_items = ["<- Movie Title: " + item.title + " ->"
                            + " <- History ratings:" + str(round(item.rating, 2)) + " ->" + "\n"
                            for item in forced_items]
        recommended_items_str = ''.join(recommended_items)

        response = await loop.run_in_executor(executor, avatar_.reaction_to_forced_items, recommended_items_str)
        cprint(response, color='yellow', attrs=None)

        # pattern = re.compile(r'BOOKT:\s*(.*?)\s* BOOKA:\s*(.*?)\s* BOOKP:\s*(.*?)\s* WATCH:\s*(.*?)\s* REASON:\s*(.*?)\s*')
        pattern = re.compile(r'''MOVIE:\s*(.*?)\s*;\s*WATCH:\s*(.*?)\s*;\s*REASON:\s*(.*?)(?:;\s*|$)(?:RATING:\s*(\d+)\s*;\s*)?''', re.VERBOSE | re.DOTALL)
        matches = re.findall(pattern, response)
        # pattern = re.compile(r'BOOK:\s*(.*?)\s* READ:\s*(.*?);\s*REASON:\s*(.*?)(?:\n|$)\s*RATING:\s*(.*?')
        # matches = re.findall(pattern, response)

        # print(matches[:len(forced_items)])
        # watched_movies = [(movie_title.strip(';')) for movie_title, watch, reason in matches if (watch.strip(';') == 'yes')]
        like_movies = [(idx, book.strip(';'), rating) for idx, (book, read, reason, rating) in enumerate(matches[:len(forced_items)]) if read.strip().lower() == 'yes']
        #..like_movies = [(idx, f"{book.strip(';')}") for idx, (book, read, reason) in enumerate(matches[:len(forced_items)]) if (read.strip(';') == 'yes' or read.strip(';') == 'Yes')]
        like_movies_ids = [(forced_items_ids[idx], rating) for idx, movie_title, rating in like_movies]
        ids_with_rat = [(movie_title, rating) for idx, movie_title, rating in like_movies]

        #... print("Like Movie IDs: ", like_movies_ids)
        #... print("IDs with Rating: ", ids_with_rat)
        # like_movies_ids = [forced_items_ids[idx] for idx, movie_title in like_movies]
        # Create DataFrame with list of tuples in the Predicted column
                    rating_ids = info_on_page["watch_id"]
            ratings_na = info_on_page["rating_id"]
            ratings = info_on_page["rating"]
            self.user_item_list[avatar_id].extend(valid_indices)
            self.recommenderevaluator.update_user_interactions(avatar_id, info_on_page['align_id'])
            if len(rating_ids) == len(ratings_na) == len(ratings):
               for idx, (rid, na, rt) in enumerate(zip(rating_ids, ratings_na, ratings)):
                   self.pairs.append(self.recommenderevaluator.calculation_of_rating(avatar_id, rid, rt))
                   self.user_interactions[avatar_id].extend([rid])
                   self.user_history[avatar_id].extend([rid])
            else:
               print("Rating IDs: ", rating_ids)
               print("Ratings NA: ", ratings_na)
               print("Ratings: ", ratings)
               print("Error: Lengths of rating_ids, ratings_na, and ratings do not match.")

               self.recommenderevaluator.add_interaction(agent_id, i, info_on_page['recommended_id'], info_on_page["rating"])

                       for user_id in self.user_interactions:
            # print(f"User {user_id} interactions: {self.user_interactions[user_id]}")
            self.hit.append([self.recommenderevaluator.data.hit_at_k(self.user_interactions[user_id], self.user_item_list[user_id], a) for a in range(1, 11)])
            self.ndcg.append([self.recommenderevaluator.data.ndcg_at_k(self.user_interactions[user_id], self.user_item_list[user_id], b) for b in range(1, 11)])
            mean_hits = np.mean(np.array(self.hit), axis=0)
            mean_ndcg = np.mean(np.array(self.ndcg), axis=0)
            self.av_hits.append(mean_hits)
            self.av_ndcg.append(mean_ndcg)
            print(" ".join([f"HIT@{k}: {mean_hits[k-1]:.4f}" for k in range(1, 11)]))
            print(" ".join([f"NDCG@{k}: {mean_ndcg[k-1]:.4f}" for k in range(1, 11)]))
            self.hit = []
            self.ndcg = []
        avatar_df = pd.DataFrame(columns=["User", "Predicted"])

        # Add avatar_id as the 'User' column and the list of tuples to the 'Predicted' column
        avatar_df["User"] = [avatar_id]
        avatar_df["Predicted"] = [like_movies_ids]

        # Append the new avatar's data to the results DataFrame
        self.results_df = pd.concat([self.results_df, avatar_df], ignore_index=True)


        pred = np.array([1 if idx in like_movies_ids else 0 for idx in forced_items_ids])
        true = np.array([1 if idx in test_observed_items else 0 for idx in forced_items_ids])

        # Calculate precision.
        precision = get_precision(true, pred)

        # Calculate recall.
        recall = get_recall(true, pred)

        accuracy = get_accuracy(true, pred)

        f1 = get_f1(true, pred)


        # RMSE
        rmse = np.sqrt(mean_squared_error(true, pred))


        # MAE
        mae = mean_absolute_error(true, pred)

        self.precision_list.append(precision)
        self.recall_list.append(recall)
        self.accuracy_list.append(accuracy)
        self.f1_list.append(f1)
        self.rmse_list.append(rmse)
        self.mae_list.append(mae)

        global_finished_users += 1

    def simulate_all_avatars(self):
        """
        excute the simulation for all avatars
        """
        precisions = []
        recalls = []
        accuracies = []
        f1s = []
        read = []

        # initialize dictionaries to store values for each k
        precision_scores = {1: [], 3: [], 5: [], 10: []}
        recall_scores = {1: [], 3: [], 5: [], 10: []}

        global_start_time = time.time()
        print("global start time", global_start_time)
        self.start_time = time.time()
        if(self.execution_mode == 'serial'):
            t1 = time.time()
            for avatar_id in self.simulated_avatars_id:
                self.simulate_one_avatar(avatar_id)
            t2 = time.time()
            print(f"Time cost: {t2-t1}s")

        elif(self.execution_mode == 'parallel'):
            loop = asyncio.get_event_loop()
            executor = ThreadPoolExecutor(max_workers=500)
            tasks = []

            t1 = time.time()
            for avatar_id in self.simulated_avatars_id:
                if avatar_id in self.data.valid_user_list:
                    tasks.append(self.async_simulate_one_avatar(avatar_id, loop, executor))
            if loop.is_running():
                loop.run_until_complete(asyncio.gather(*tasks))
            else:
                loop.run_until_complete(asyncio.wait(tasks))
            t2 = time.time()
            for user_id in self.user_history:
                # print("user: ", user)
                # user_id = user + 1
                # read_books = recsys.data.get_item_ids(user_history[user_id])
                read_books = self.user_history[user_id]
                # print("read books: ", read_books)
                # print("read books length: ", len(read_books))
                item_set = self.recommenderevaluator.interaction_dict.get(user_id, set())
                all_item = list(item_set)
                # print("all item: ", all_item)
                # print("all item length: ", len(all_item))
                precision, recall, accuracy, f1 = self.recommenderevaluator.calculate_user_metrics(user_id, read_books, all_item)
                precisions.append(precision)
                recalls.append(recall)
                accuracies.append(accuracy)
                # read_list = recsys.data.get_item_ids(user_history[user_id])
                read_list = self.user_history[user_id]
                read.append(get_entropy(read_list, self.recommenderevaluator.data))
                f1s.append(f1)
                for k in [1, 3, 5, 10]:
                    precisonk, recallk = self.recommenderevaluator.precisionandrecallk(user_id, self.user_history[user_id], k)
                    print(f"Precision@{k}: {precisonk}, Recall@{k}: {recallk}")
                    precision_scores[k].append(precisonk)
                    recall_scores[k].append(recallk)

            mse, rmse, gt_pct, pred_pct, loglike, ob_loglike, spearman = self.recommenderevaluator.calc_mse_rmse_rating_percentages(self.pairs)


            avg_precision = sum(precisions) / len(precisions) if precisions else 0
            avg_recall = sum(recalls) / len(recalls) if recalls else 0
            avg_accuracy = sum(accuracies) / len(accuracies) if accuracies else 0
            avg_f1 = sum(f1s) / len(f1s) if f1s else 0

            round_summary = []
            # print("User Directory: ", self.recommenderevaluator.user_directory)
            for user, logs in self.recommenderevaluator.user_directory.items():
                current_round = 1
                prev_page = 0
                round_pages = []
                round_recommended = []
                round_rated = []

                for entry in logs:
                    page = entry["page"]
                    # print("page", page)

                    # Detect round reset (page goes back to 1)
                    if page <= prev_page:
                       # Save previous round
                       N_expose = len(round_recommended)
                       N_view = len(round_rated)
                       # NOTE: rated_id are item IDs, not ratings → adjust if ratings are available
                       N_like = len([r for r in round_rated if r > 3])
                       N_exit = len(round_pages)
                       S_sat = 5

                       self.recommenderevaluator.add_user(user, N_expose, N_view, N_like, N_exit, S_sat)

                       # Start new round
                       current_round += 1
                       round_pages = []
                       round_recommended = []
                       round_rated = []
                    # Accumulate round data
                    round_pages.append(page)
                    round_recommended.extend(entry["recommended_id"])
                    round_rated.extend(entry["rated_id"])
                    prev_page = page
                # Save the last round after loop
                if round_pages:
                   N_expose = len(round_recommended)
                   N_view = len(round_rated)
                   N_like = len([r for r in round_rated if r > 3])  # adjust if ratings exist
                   N_exit = len(round_pages)
                   S_sat = 5

                   self.recommenderevaluator.add_user(user, N_expose, N_view, N_like, N_exit, S_sat)

            metrics = self.recommenderevaluator.satisfaction_metrics()
            print("Total Satisfaction Metrics (Pview, Nlike, Plike, Nexit, Ssat):", metrics)
            # calculate averages
            for k in [1, 3, 5, 10]:
               mavg_precision = np.mean(precision_scores[k])
               mavg_recall = np.mean(recall_scores[k])
               print(f"Average Precision@{k}: {mavg_precision:.4f}, Average Recall@{k}: {mavg_recall:.4f}")


    async def async_simulate_one_avatar(self, avatar_id, loop, executor):
        """
        async
        excute the simulation for one avatar
        avatar_id: the id of the simulated avatar
        """
        global global_finished_users
        global global_finished_pages
        self.hit = []
        self.ndcg = []


        start_time = time.time()
        time_local = time.localtime(start_time)
        l_start = time.strftime("%Y-%m-%d %H:%M:%S",time_local)
        with open(self.storage_base_path + "/system_log.txt", 'a') as f:
            f.write(f"Start: {l_start}. User {avatar_id} starts simulation.\n")

        avatar_ = self.avatars[avatar_id]
        avatar_.write_log(f"Is simulating avatar {avatar_id}")
        avatar_.exit_flag = False
        page_generator = self.page_generator(avatar_id)
        i = 0
        user_behavior_dict = {}
        user_interview_dict = {}


        while not avatar_.exit_flag:
            if i == 0:

               val_list = self.data.valid_user_list[avatar_id]
              #  print("val_list", val_list)

               id_on_page = np.random.choice(val_list,
                                             min(len(val_list), 5), replace=False)
              #  print("id_on_page", id_on_page)
            else:
                id_on_page = next(page_generator, [])
            i += 1
            # id_on_page = next(page_generator, []) # get the next page, a list of item ids
            if(len(id_on_page) == 0):
                break
            valid_indices = [idx for idx in id_on_page if 0 <= idx < len(self.movie_detail)]
            # print("valid_indices", valid_indices)

            movies_on_page = [self.movie_detail.iloc[idx] for idx in id_on_page]
            # print("movies_on_page", movies_on_page)
            # movies_on_page = [self.movie_detail.iloc[idx] for idx in valid_indices]
            # movies_on_page = [self.movie_detail.loc[idx] for idx in id_on_page] # movie_detail.csv
            recommended_items = ["<- Movie Title: " + item.title + " ->"
                      # "<- Book Author: " + item.Book_Author + " ->"
                      # "<- Publisher: " + item.Publisher + " ->"
                            + " <- Genres: " + (',').join(list(item.genre.split('|'))) + " ->"
                            + " <- History ratings:" + str(round(item.rating, 2)) + " ->" + "\n"
                            for item in movies_on_page]
            # print("recommended_items", recommended_items)

            if(self.add_advert):
                #store_path = op.join(f"storage/{self.dataset}/{self.modeltype}/{self.simulation_name}/adver_id", f"avatar{avatar_id}_{i}.txt")
                store_path = f"storage/{self.dataset}/{self.modeltype}/{self.simulation_name}/adver_id"
                if not os.path.exists(store_path):
                    os.makedirs(store_path)
                if not self.display_advert:
                    recommended_items[0], valid_indices, movies_on_page = self.display_only_adver_item(store_path, avatar_id, i, id_on_page, movies_on_page)
                else:
                    recommended_items[0], valid_indices, movies_on_page = self.display_item_with_adver(store_path, avatar_id, i, id_on_page, movies_on_page)


            recommended_items_str = ''.join(recommended_items)


            # Please write down the recommended information.
            avatar_.write_log(f"\n=============    Recommendation Page {i}    =============")
            for idx, movie in enumerate(movies_on_page):
                if(id_on_page[idx] in self.data.valid_user_list[avatar_id]):
                    # avatar_.write_log(f"== (√) Book Title: {movie.title}, Book Author: {movie.Book_Author}, Publisher: {movie.Publisher}, History ratings: {round(movie.rating,2)}", "blue", attrs=["bold"])
                    avatar_.write_log(f"== (√) Movie Title: {movie.title}, History ratings: {round(movie.rating,2)}", "blue", attrs=["bold"])
                else:
                    avatar_.write_log(f"== Movie Title: {movie.title}, History ratings: {round(movie.rating,2)}")
            avatar_.write_log(f"=============          End Page {i}        =============\n")

            # As a translator, I will translate the Chinese sentence you sent me into English. I do not need to understand the meaning of the content to provide a response.
            avatar_.write_log(f"\n==============    Avatar {avatar_.avatar_id} Response {i}   =============")


            # @ most important Waiting for user response.
            response = await loop.run_in_executor(executor, avatar_.reaction_to_recommended_items, recommended_items_str, i)

            #==============================================
            # @ View user's favorite items
            #pattern = re.compile(r'MOVIE:\s*(.*?)\s*WATCH:\s*(.*?)\s*REASON:\s*(.*?)\s*FEELING:\s*(.*?)\s*RATING:\s*(\d)')
            ################################################################################################################
            # pattern = re.compile(r'MOVIE:\s*(.*?)\s*WATCH:\s*(.*?)\s*REASON:\s*(.*?)\s*RATING:\s*(.*?)\s*FEELING:(.*?)')
            # matches = re.findall(pattern, response)
            pattern1 = re.compile(r'MOVIE: (.+?); RATING: (\d+); FEELING: (.*)')
            match1 = pattern1.findall(response)
            pattern2 = re.compile(r'MOVIE: (.+?); ALIGN: (.+?); REASON: (.*)')
            match2 = pattern2.findall(response)

            # pattern_interview = re.compile(r'RATING:\s*(.*?)\s*REASON:\s*(.*?)')
            # matches_interview = re.findall(pattern_interview, interview_response)

            if(self.add_advert):
                if(match2[0][1].strip(';') == 'yes'):
                    self.clicked_adverts += 1

            title_id_dict = dict(zip(self.movie_detail['title'], self.movie_detail['item_id']))
            watched_movies = [f"{book.strip(';')}" for book, rating, feeling in match1]
            watched_movies_contain_id = [(idx, f"{book.strip(';')}", feeling.strip(';')) for idx, (book, rating, feeling) in enumerate(match1[:self.items_per_page])]
            # 5 points means the movie is liked by the user.
            like_movies = [(idx, f"{book.strip(';')}", feeling.strip(';')) for idx, (book, rating, feeling) in enumerate(match1[:self.items_per_page]) if int(rating.strip(';')) == 5]
            align_movies = [(idx, f"{book.strip(';')}", reason.strip(';')) for idx, (book, align, reason) in enumerate(match2[:self.items_per_page]) if (align.strip(';') == 'Yes' or align.strip(';') == 'yes')]

            info_on_page = {}
            info_on_page['page'] = i
            info_on_page['ground_truth'] = [valid_indices[idx] for idx, movie in enumerate(movies_on_page) if valid_indices[idx] in self.data.valid_user_list[avatar_id]]
            info_on_page['recommended_id'] = valid_indices
            info_on_page['recommended'] = [(self.movie_detail['title'][idx]) for idx in valid_indices]
            info_on_page['align_id'] = [title_id_dict[title] for id, title, reason in align_movies if title in title_id_dict]
            info_on_page['like_id'] = [title_id_dict[title] for id, title, reason in like_movies if title in title_id_dict]
            info_on_page['watch_id'] = [title_id_dict[title] for title in watched_movies if title in title_id_dict]
            info_on_page['watched'] = watched_movies
            info_on_page['rating_id'] = watched_movies
            info_on_page['rating'] = [int(rating.strip(';')) for book, rating, feeling in match1]
            #info_on_page['reason'] = [reason.strip(';') for movie_title, rating, feeling in match1]
            info_on_page['feeling'] = [feeling.strip(';') for book, rating, feeling in match1]
            user_behavior_dict[i] = info_on_page
            print("\n=======================")
            agent_id = avatar_id + 1
            print("Agent ID:", avatar_id)
            print("Agent ID: ", agent_id)
            print("User behavior:", user_behavior_dict[i])
            print("Page: ", i)
            print("Recommended item ids: ", id_on_page)
            print("+++++++++++++==========\n")
            # Extract separately
            rating_ids = info_on_page["watch_id"]
            ratings_na = info_on_page["rating_id"]
            ratings = info_on_page["rating"]
            self.user_item_list[avatar_id].extend(valid_indices)
            self.recommenderevaluator.update_user_interactions(avatar_id, info_on_page['align_id'])
            if len(rating_ids) == len(ratings_na) == len(ratings):
               for idx, (rid, na, rt) in enumerate(zip(rating_ids, ratings_na, ratings)):
                   self.pairs.append(self.recommenderevaluator.calculation_of_rating(avatar_id, rid, rt))
                   self.user_interactions[avatar_id].extend([rid])
                   self.user_history[avatar_id].extend([rid])
            else:
               print("Rating IDs: ", rating_ids)
               print("Ratings NA: ", ratings_na)
               print("Ratings: ", ratings)
               print("Error: Lengths of rating_ids, ratings_na, and ratings do not match.")


            # @ Add new training data.
            # new_train = [id_on_page[idx] for idx, movie, reason in like_movies] # Add all liked item ids in the validation set to the training set.
            # tmp = [(idx, movie_title.strip(';'), feeling.strip(';')) for idx, (movie_title, rating, feeling) in enumerate(match1[:self.items_per_page])]
            new_train = info_on_page['align_id']
            self.new_train_dict[avatar_id].extend(new_train)

            # @ Record the average number of likes.
            self.n_likes[avatar_id].append(len(new_train))
            # ratings = re.findall(r'RATING: (\d+)', response)
            ratings = re.findall(r'RATING: (\d+);', response)
            average_rating = sum([int(rating.strip(';')) for rating in ratings])/max(len(watched_movies), 1)
            # Add the average score of this page.
            self.ratings[avatar_id].append(average_rating)
            self.watch[avatar_id].extend([movie for movie in watched_movies])

            # @ Calculate the precision on this page and save it.
            ground_truth = [valid_indices[idx] for idx, movie in enumerate(movies_on_page) if valid_indices[idx] in self.data.valid_user_list[avatar_id]]
            # print(like_movies, ground_truth)
            perf = (len(set(new_train) & set(ground_truth)), len(new_train), len(ground_truth))
            self.perf_per_page[avatar_id].append(perf)
            #==============================================

            global_finished_pages += 1
            self.recommenderevaluator.add_interaction(agent_id, i, info_on_page['recommended_id'], info_on_page["rating"])

            # @ Force exit if the number of pages exceeds the maximum limit.
            if(i >= self.max_pages):
                avatar_.exit_flag = True

        interview_response = avatar_.response_to_question("Do you feel satisfied with the recommender system you have just interacted? Rate this recommender system from 1-10 and give explanation.\n Please use this respond format: RATING: [integer between 1 and 10]; REASON: [explanation]; In RATING part just give your rating and other reason and explanation should included in the REASON part.", remember=False)
        # Extract RAING and REASON using re.
        pattern_interview = re.compile(r'RATING:\s*(.*?)\s*REASON:\s*(.*?)')
        # pattern_interview = re.compile(r'RATING:\s*(.*?)\s*REASON:\s*(.*?)')
        #pattern = re.compile(r'MOVIE:\s*(.*?)\s*WATCH:\s*(.*?)\s*REASON:\s*(.*?)\s*RATING:\s*(.*?)\s*FEELING:(.*?)')
        matches_interview = re.findall(r'(?<=RATING:|REASON:).*', interview_response)
        user_interview_dict['interview'] = matches_interview
        print(matches_interview)
        self.exit_page[avatar_id] = i
        self.finished_num += 1
        self.remaining_users.remove(avatar_id)
        remaining = ", ".join([str(u) for u in self.remaining_users])
        for user_id in self.user_interactions:
            # print(f"User {user_id} interactions: {self.user_interactions[user_id]}")
            self.hit.append([self.recommenderevaluator.data.hit_at_k(self.user_interactions[user_id], self.user_item_list[user_id], a) for a in range(1, 11)])
            self.ndcg.append([self.recommenderevaluator.data.ndcg_at_k(self.user_interactions[user_id], self.user_item_list[user_id], b) for b in range(1, 11)])
            mean_hits = np.mean(np.array(self.hit), axis=0)
            mean_ndcg = np.mean(np.array(self.ndcg), axis=0)
            self.av_hits.append(mean_hits)
            self.av_ndcg.append(mean_ndcg)
            print(" ".join([f"HIT@{k}: {mean_hits[k-1]:.4f}" for k in range(1, 11)]))
            print(" ".join([f"NDCG@{k}: {mean_ndcg[k-1]:.4f}" for k in range(1, 11)]))
            self.hit = []
            self.ndcg = []

        end_time = time.time()
        time_local = time.localtime(end_time)
        l_end = time.strftime("%Y-%m-%d %H:%M:%S",time_local)
        global_finished_users += 1
        with open(self.storage_base_path + "/system_log.txt", 'a') as f:
            f.write(f"Start: {l_start} End: {l_end}. User {avatar_id} finished after {i} pages. [{self.finished_num} / {self.n_avatars}]. Total token cost: {round(self.avatars[avatar_id].memory.user_k_tokens, 2)}k. Taking {round(time.time() - start_time, 2)}s\n")
            f.write(f"Remaining users: {remaining}\n")

        # @ Save the behavior of each individual.
        behavior_path = self.storage_base_path+ "/behavior"
        if not os.path.exists(behavior_path):
            os.makedirs(behavior_path)
        with open(behavior_path + f"/{avatar_id}.pkl", 'wb') as f:
            pickle.dump(user_behavior_dict, f)

        interview_path = self.storage_base_path+ "/interview"
        if not os.path.exists(interview_path):
            os.makedirs(interview_path)
        with open(interview_path + f"/{avatar_id}.pkl", 'wb') as f:
            pickle.dump(user_interview_dict, f)

    def simulate_one_avatar(self, avatar_id):
        """
        excute the simulation for one avatar
        avatar_id: the id of the simulated avatar
        """
        # print("\nIs simulating avatar {}".format(avatar_id))
        avatar_ = self.avatars[avatar_id]
        avatar_.write_log(f"Is simulating avatar {avatar_id}")
        avatar_.exit_flag = False
        page_generator = self.page_generator(avatar_id)
        current_page = 0
        while not avatar_.exit_flag:
        # for i in range(2):
            id_on_page = next(page_generator, []) # get the next page, a list of item ids
            current_page +=1
            if(len(id_on_page) == 0):
                break

            valid_indices = [idx for idx in id_on_page if 0 <= idx < len(self.movie_detail)]

            # movies_on_page = [self.movie_detail.iloc[idx] for idx in id_on_page]
            movies_on_page = [self.movie_detail.iloc[idx] for idx in valid_indices]
            avatar_.write_log("=============    Recommendation Page    =============")
            for idx, movie in enumerate(movies_on_page):
                if(id_on_page[idx] in self.data.valid_user_list[avatar_id]):
                    avatar_.write_log(f"== {movie} (√)", "blue", attrs=["bold"])
                else:
                    avatar_.write_log(f"== {movie}")
            avatar_.write_log("=============          End Page         =============")
            avatar_.write_log("")
            movie_titles = [movie['title'] for movie in movies_on_page]
            movie_to_str = ';'.join(movie_titles)
            print(movie_to_str)

            #@ most important
            response = avatar_.reaction_to_recommended_items(movie_to_str, current_page)

            avatar_.write_log("")
            avatar_.write_log("=============    Avatar Response    =============")
            avatar_.write_log(response, color='yellow', attrs=None)


    def parse_response(self, response):
        #pattern = re.compile(r'MOVIE:\s*(.*?)\s*WATCH:\s*(.*?)\s*REASON:\s*(.*?)\s*FEELING:\s*(.*?)\s*RATING:\s*(\d)')
        pattern = re.compile(r'MOVIE:\s*(.*?)\s**WATCH:\s*(.*?)\s*REASON:\s*(.*?)\s*RATING:\s*(.*?)\s*FEELING:(.*?)')
        matches = re.findall(pattern, response)

        watched_movies, watched_movies_contain_id = [], []

        for idx, (movie_title, watch, reason, rating, feeling) in enumerate(matches):
            if(self.add_advert and idx == 0 and watch.strip(';') == 'yes'): # If the first one has an advertisement and the user clicked on it.
                self.clicked_adverts += 1
            if(watch.strip(';') == 'yes'):
                watched_movies.append(movie_title.strip(';'))
            print(movie_title, watch, reason, rating, feeling)
        return response

    def display_only_adver_item(self, store_path, avatar_id, i, id_on_page, movies_on_page):
        store_path = op.join(store_path, f"avatar{avatar_id}_{i}.txt")
        try:
            with open(store_path, 'r') as f1:
                random_key = int(f1.read())
        except:
            try:
                store_path_minus_1 = op.join(store_path, f"avatar{avatar_id}_{i-1}.txt")
                with open(store_path_minus_1, 'r') as f2:
                    random_key = int(f2.read())
            except:
                store_path_minus_2 = op.join(store_path, f"avatar{avatar_id}_{i-2}.txt")
                with open(store_path_minus_2, 'r') as f3:
                    random_key = int(f3.read())
                    try:
                        store_path_minus_3 = op.join(store_path, f"avatar{avatar_id}_{i-3}.txt")
                        with open(store_path_minus_3, 'r') as f4:
                            random_key = int(f4.read())
                    except:
                            store_path_minus_4 = op.join(store_path, f"avatar{avatar_id}_{i-4}.txt")
                            with open(store_path_minus_4, 'r') as f5:
                                random_key = int(f5.read())


        self.total_adverts += 1
        id_on_page[0] = random_key
        movies_on_page[0] = self.movie_detail.loc[random_key]
        adver_information = self.advert[random_key]

        return ( "<- " + adver_information['title'] + " ->"
                                + " <- History ratings:" + str(round(adver_information['rating'], 2)) + " ->"
                                + " <- Summary:" + adver_information['summary'] + " ->" + "\n"), id_on_page, movies_on_page

    def display_item_with_adver(self, store_path, avatar_id, i, id_on_page, movies_on_page):
        store_path = op.join(store_path, f"avatar{avatar_id}_{i}.txt")
        random_key = np.random.choice(list(self.advert.keys()))
        self.total_adverts += 1
        random_advert = self.advert[random_key]
        id_on_page[0] = random_key
        movies_on_page[0] = self.movie_detail.loc[random_key]
        advert_item_id = random_key

        with open(store_path, 'w') as f:
            f.write(f"{advert_item_id}")

        return ( self.advert_word
                + "<- " + random_advert['title'] + " ->"
                + "<- " + random_advert['review'] + " ->"
                + " <- History ratings:" + str(round(random_advert['rating'], 2)) + " ->"
                + " <- Summary:" + random_advert['summary'] + " ->" + "\n"), id_on_page, movies_on_page

    def save_results(self):
        """
        save the results of the simulation
        """
        # if(self.n_avatars == self.data.n_users):
        def save_user_dict_to_txt(user_dict, base_path, filename):
            with open(base_path + filename, 'w') as f:
                for u, v in user_dict.items():
                    f.write(str(int(u)))
                    for i in v:
                        f.write(' ' + str(int(i)))
                    f.write('\n')

        # save_path = f"datasets/{self.dataset}_{self.modeltype}/cf_data/"
        save_path = f"storage/{self.dataset}/{self.modeltype}/{self.simulation_name}/"
        save_user_dict_to_txt(self.new_train_dict, save_path, 'train.txt')

        # @ Save overall evaluation indicators.
        # Average number of clicks per user
        cprint("Number of likes", color='green', attrs=['bold'])
        cprint(self.n_likes, color='green', attrs=['bold'])
        average_n_likes = {avatar_id:np.mean(n_likes) for avatar_id, n_likes in self.n_likes.items()}
        cprint(average_n_likes, color='green', attrs=['bold'])

        overall_n_likes = np.mean(list(average_n_likes.values()))
        cprint(f"\nOverall number of likes: {overall_n_likes}", color='green', attrs=['bold'])

        # Average satisfaction
        cprint("\nRatings", color='green', attrs=['bold'])
        cprint(self.ratings, color='green', attrs=['bold'])
        average_ratings = {avatar_id:np.mean(ratings) for avatar_id, ratings in self.ratings.items()}
        cprint(average_ratings, color='green', attrs=['bold'])

        # @ Save average click-through rate
        average_click_rate = {avatar_id:len(movies)/(self.max_pages*self.items_per_page) for avatar_id, movies in self.watch.items()}
        cprint(f"\nAverage click rate: {average_click_rate}", color='green', attrs=['bold'])
        overall_click_rate = np.mean(list(average_click_rate.values()))
        cprint(f"\nOverall satisfaction: {overall_click_rate}", color='green', attrs=['bold']) # Average click-through rate

        # overall_click_rate = np.mean(list(average_ratings.values()))
        # cprint(f"\nOverall satisfaction: {overall_click_rate}", color='green', attrs=['bold'])

        # Average exit page
        mean_exit_page = np.mean(list(self.exit_page.values()))
        cprint("\nExit pages", color='green', attrs=['bold'])
        cprint(self.exit_page, color='green', attrs=['bold'])
        cprint(f"Average exit page: {mean_exit_page}", color='green', attrs=['bold'])

        # Average precision and recall
        cprint("\nPrecision and recall", color='green', attrs=['bold'])
        cprint(self.perf_per_page, color="green", attrs=['bold'])
        total_perf = {avatar_id:[sum([i for i, j, k in perf_per_page]), sum([j for i, j, k in perf_per_page]), sum([k for i, j, k in perf_per_page])] for avatar_id, perf_per_page in self.perf_per_page.items()}
        total_recall_precision = {avatar_id:(perf[0]/max(perf[1], 1), perf[0]/max(perf[2], 1)) for avatar_id, perf in total_perf.items()}
        cprint(total_perf, color="green", attrs=['bold'])
        cprint(total_recall_precision, color="green", attrs=['bold'])
        average_precision = np.mean([metrics[0] for avatar_id, metrics in total_recall_precision.items()])
        average_recall = np.mean([metrics[1] for avatar_id, metrics in total_recall_precision.items()])
        cprint(f"Precision: {average_precision}  Recall: {average_recall}", color="green", attrs=['bold'])
        # metrics_path = self.storage_base_path + "/metrics.txt"
        total_k_tokens = sum([self.avatars[i].memory.user_k_tokens for i in range(self.n_avatars)])

        # Effective advertising rate
        if(self.add_advert):
            cprint("\nAdvert", color='green', attrs=['bold'])
            cprint(f"Total advert: {self.total_adverts}", color='green', attrs=['bold'])
            cprint(f"Clicked advert: {self.clicked_adverts}", color='green', attrs=['bold'])
            cprint(f"Advert click rate: {self.clicked_adverts/self.total_adverts}", color='green', attrs=['bold'])

        end_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
        with open(self.storage_base_path + "/metrics.txt", 'w') as f:
            f.write(f"Finished time: {end_time}\n")
            f.write(f"Total simulation time: {round(time.time() - self.start_time, 2)}s\n")
            f.write(f"n_avatars: {self.n_avatars}\n")
            f.write(f"Average recall: {average_recall}\n")
            f.write(f"Average presion: {average_precision}\n")
            f.write(f"Total k tokens: {round(total_k_tokens, 2)}k tokens\n")
            f.write(f"Total cost: {round(total_k_tokens*0.0018, 2)} \n")
            # f.write(f"Average precision: {}")
            f.write(f"Maximum exit page: {self.max_pages}\n")
            f.write(f"Overall click rate: {overall_click_rate}\n")
            f.write(f"Average number of likes: {overall_n_likes}\n")
            f.write(f"Average exit page: {mean_exit_page}\n")
            if(self.add_advert):
                f.write(f"Total advert: {self.total_adverts}\n")
                f.write(f"Clicked advert: {self.clicked_adverts}\n")
                f.write(f"Advert click rate: {self.clicked_adverts/self.total_adverts}\n")