### user_history_item_feat

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm

dataset = 'baby'
inter_data = pd.read_csv('./{}/{}.inter'.format(dataset, dataset), sep='\t')

text_feat = np.load('./{}/text_feat.npy'.format(dataset))
image_feat = np.load('./{}/image_feat.npy'.format(dataset))

user_histories = {}

for index, row in inter_data.iterrows():
    user_id = int(row['userID'])

    if user_id not in user_histories:
        user_histories[user_id] = []
    
    if row['x_label'] == 0: 
        user_histories[user_id].append(int(row['itemID']))

user_num = len(user_histories)
item_num = text_feat.shape[0]

norm_item_features_image = F.normalize(torch.tensor(image_feat, dtype=torch.float32), p=2, dim=1)
norm_item_features_text = F.normalize(torch.tensor(text_feat, dtype=torch.float32), p=2, dim=1)

In [3]:
user_aver_image_history = []
user_aver_text_history = []

for i in range(user_num):
    history_items = user_histories[i]

    if len(history_items) == 0:
        print(f'user {i} no history')
    else:
        history_image_feats = norm_item_features_image[history_items]
        history_text_feats = norm_item_features_text[history_items]

        aver_history_image_feat = torch.mean(history_image_feats, dim=0)
        aver_history_text_feat = torch.mean(history_text_feats, dim=0)

        user_aver_text_history.append(aver_history_text_feat)
        user_aver_image_history.append(aver_history_image_feat)

user_aver_text_history = torch.stack(user_aver_text_history)
user_aver_image_history = torch.stack(user_aver_image_history)

np.save('./{}/{}_user_aver_history_item_text.npy'.format(dataset, dataset), user_his_text_emb)
np.save('./{}/{}_user_aver_history_item_image.npy'.format(dataset, dataset), user_his_image_emb)

### user_similarity_preference

In [7]:
import numpy as np
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import random
import os
from scipy import stats
from sklearn.metrics.pairwise import cosine_similarity
random.seed(999)

In [9]:
def average_similarity(similarity_matrix):
    np.fill_diagonal(similarity_matrix, np.nan)
    mean_similarity = np.nanmean(similarity_matrix)
    return mean_similarity

dataset = 'baby'

text_feat = np.load('./{}/text_feat.npy'.format(dataset))
image_feat = np.load('./{}/image_feat.npy'.format(dataset))

image_norm = np.linalg.norm(image_feat, axis=1, keepdims=True)
text_norm = np.linalg.norm(text_feat, axis=1, keepdims=True)

image_feat = image_feat / image_norm
text_feat = text_feat / text_norm

all_data = pd.read_csv('./{}/{}.inter'.format(dataset, dataset),  sep="\t")
train_data = all_data[all_data['x_label']==0]

train_user_inter = defaultdict(list)

for _, row in train_data.iterrows():
    user, item = int(row["userID"]), int(row["itemID"])
    train_user_inter[user].append(item)

In [10]:
train_user_inter_image_feat = []
train_user_inter_text_feat = []

for i in range(user_num):
    train_user_inter_image_feat.append(image_feat[train_user_inter[i]])
    train_user_inter_text_feat.append(text_feat[train_user_inter[i]])

user_train_inter_sim_image = {}
user_train_inter_sim_text = {}

user_train_inter_sim_image_list = []
user_train_inter_sim_text_list = []

for i in range(user_num):
    if len(train_user_inter[i]) ==  0 :
        print(f'user {i} no his')
    else:
        train_inter_per_user_image_feat = train_user_inter_image_feat[i] # (inter_num, modal_dim)
        train_inter_per_user_text_feat = train_user_inter_text_feat[i]

        train_image_sim_matrix = cosine_similarity(train_inter_per_user_image_feat)
        train_text_sim_matrix = cosine_similarity(train_inter_per_user_text_feat)

        aver_image_sim = average_similarity(train_image_sim_matrix)
        aver_text_sim = average_similarity(train_text_sim_matrix)

        user_train_inter_sim_image[i] = aver_image_sim
        user_train_inter_sim_text[i] = aver_text_sim

        user_train_inter_sim_image_list.append(aver_image_sim)
        user_train_inter_sim_text_list.append(aver_text_sim)

user_his_image_pre = np.array(user_train_inter_sim_image_list)
user_his_text_pre = np.array( user_train_inter_sim_text_list)

np.save('./{}/{}_user_history_image_pre.npy'.format(dataset,dataset), user_his_image_pre)
np.save('./{}/{}_user_history_text_pre.npy'.format(dataset,dataset), user_his_text_pre)