In [None]:
import collections
import pickle
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from matrix_factorization import MF, NCF
from utils import *
from sklearn.metrics import roc_auc_score
import time
np.random.seed(2020)
torch.manual_seed(2020)

def mse_func(x, y): return np.mean((x-y)**2)
rdf = pd.read_csv('./ratings.dat', sep='::', names=["user_id", "item_id", "rating", "timestamp"])
rdf.iloc[:, 2][rdf.iloc[:, 2] <= 3] = 0
rdf.iloc[:, 2][rdf.iloc[:, 2] > 3] = 1
rdf.iloc[:, :2] -= 1
rdf = np.array(rdf)[:, :3]
np.random.shuffle(rdf)

In [None]:
x_train = np.array(rdf[:, :2])
y_train = np.array(rdf[:, 2])
num_user = np.max(x_train[:, 0]) + 1
num_item = np.max(x_train[:, 1]) + 1

In [None]:
def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))

In [None]:
ncf = NCF(num_user, num_item, embedding_k = 64)
ncf.cuda()
ncf.fit(x_train, y_train,
       lr=0.01,
       batch_size=8192,
       lamb=1e-5,
       tol=1e-5,
       verbose=False)

test_pred, _ = ncf.predict(x_train)
print(np.mean(test_pred))
print(np.min(test_pred))
print(np.max(test_pred))
mse_ncf = mse_func(y_train, test_pred)
auc_ncf = roc_auc_score(y_train, test_pred)

print(mse_ncf, auc_ncf)

In [None]:
mf_pretrain = MF(num_user, num_item, embedding_k = 64)
mf_pretrain.cuda()

mf_pretrain.fit(x_train, y_train, 
    lr=0.01,
    batch_size=2048,
    lamb=1e-5,
    tol=1e-5,
    verbose=False)

test_pred, _ = mf_pretrain.predict(x_train)
print(np.mean(test_pred))
print(np.min(test_pred))
print(np.max(test_pred))
mse_mf = mse_func(y_train, test_pred)
auc_mf = roc_auc_score(y_train, test_pred)

print(mse_mf, auc_mf)

In [None]:
class_C = np.c_[np.array(rdf[rdf[:, 2] == 0][:, 0]), np.array(rdf[rdf[:, 2] == 0][:, 1])]

In [None]:
all_num = len(rdf[rdf[:, 2] == 1][:, 0])
d_e_ratio = 0.5
temp_ui = np.c_[np.array(rdf[rdf[:, 2] == 1][:, 0]), np.array(rdf[rdf[:, 2] == 1][:, 1])]
temp_rating, z_emb = ncf.predict(temp_ui)

temp_rdf = temp_ui[np.argsort(-temp_rating)]
class_D = temp_rdf[:int(d_e_ratio * len(temp_rating))]
class_E = temp_rdf[int(d_e_ratio * len(temp_rating)):]

In [None]:
a_b_ratio = 0.5
all_num = int(len(rdf[:, 0]) * 0.8)
all_data = pd.DataFrame(
    np.zeros((num_user, num_item))).stack().reset_index()

all_data = all_data.values[:, :2]

unlabeled_x = np.array(
    list(set(map(tuple, all_data)) - set(map(tuple, rdf[:, :2]))), dtype=int)
np.random.shuffle(unlabeled_x)
unlabeled_x = unlabeled_x[:all_num]

temp_rating, z_emb = ncf.predict(unlabeled_x)

temp_rdf = unlabeled_x[np.argsort(-temp_rating)]
class_A = temp_rdf[:int(a_b_ratio * len(temp_rating))]
class_B = temp_rdf[int(a_b_ratio * len(temp_rating)):]

In [None]:
temp = np.zeros([class_A.shape[0], 4])
class_new_A = np.c_[class_A, temp]
temp = np.zeros([class_B.shape[0], 4])
temp[:,[2,3]] = int(1)
class_new_B = np.c_[class_B, temp]
temp = np.zeros([class_C.shape[0], 4])
temp[:,1] = 1
class_new_C = np.c_[class_C, temp]
temp = np.zeros([class_D.shape[0], 4])
temp[:,[1, 3]] = 1
class_new_D = np.c_[class_D, temp]
temp = np.zeros([class_E.shape[0], 4])
temp[:,[1,2,3]] = 1
class_new_E = np.c_[class_E, temp]
constructed_data = np.r_[np.r_[np.r_[np.r_[class_new_A,class_new_B],class_new_C],class_new_D],class_new_E]

In [None]:
z = np.ones(constructed_data.shape[0])
for i in range(len(z)):
    if constructed_data[i][3] == 0 and constructed_data[i][4] == 1 and constructed_data[i][5] == 1:
        z[i] = 2
    elif constructed_data[i][3] == 1 and constructed_data[i][4] == 0 and constructed_data[i][5] == 0:
        z[i] = 3
    elif constructed_data[i][3] == 1 and constructed_data[i][4] == 0 and constructed_data[i][5] == 1:
        z[i] = 4
    elif constructed_data[i][3] == 1 and constructed_data[i][4] == 1 and constructed_data[i][5] == 1:
        z[i] = 5
constructed_data = np.c_[constructed_data, z]

In [None]:
np.random.shuffle(constructed_data)
constructed_data_train = constructed_data[:int(0.7*constructed_data.shape[0])]
constructed_data_test = constructed_data[int(0.7*constructed_data.shape[0]):]

In [None]:
all_pred, z_emb = mf_pretrain.predict(constructed_data_train[:,:2])
all_pred_bi = np.random.binomial(1, all_pred)

T_1 = constructed_data_train[all_pred_bi == 1]
T_0 = constructed_data_train[all_pred_bi == 0]

T_1 = np.c_[np.c_[np.c_[T_1[:, :2], np.ones(T_1.shape[0])], T_1[:, 3]], T_1[:, 5]]
T_0 = np.c_[np.c_[np.c_[T_0[:, :2], np.zeros(T_0.shape[0])], T_0[:, 2]], T_0[:, 4]]

x_tr = np.r_[T_0[:, :2], T_1[:, :2]]
t_tr = np.r_[np.zeros(T_0.shape[0]), np.ones(T_1.shape[0])]
c_tr = np.r_[T_0[:,3], T_1[:,3]]
y_tr = np.r_[T_0[:,4], T_1[:,4]]

In [None]:
file = open("constructed_data", "wb")
pickle.dump(constructed_data_train, file)
pickle.dump(constructed_data_test, file)
pickle.dump(x_tr, file)
pickle.dump(t_tr, file)
pickle.dump(c_tr, file)
pickle.dump(y_tr, file)
file.close()