In [2]:
# Standard library imports
import random
import time

# Third-party imports
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', None)

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch_geometric
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import degree
from torch_geometric.nn.models import lightgcn

from tqdm.notebook import tqdm
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split


In [None]:
columns_name=['user_id','item_id','rating','timestamp']
df = pd.read_csv("../data/ml-100k/u.data",sep="\t",names=columns_name)
print(len(df))
display(df.head(5))

In [None]:
# remove low rating
df = df[df['rating']>=3]
print(len(df))

In [None]:
# train test split
train, test = train_test_split(df.values, test_size=0.2, random_state=16)
train_df = pd.DataFrame(train, columns=df.columns)
test_df = pd.DataFrame(test, columns=df.columns)

In [None]:
print("Train Size  : ", len(train_df))
print("Test Size : ", len (test_df))

In [None]:
# relabeling nodes
le_user = pp.LabelEncoder()
le_item = pp.LabelEncoder()
train_df['user_id_idx'] = le_user.fit_transform(train_df['user_id'].values)
train_df['item_id_idx'] = le_item.fit_transform(train_df['item_id'].values)

In [None]:
# remove user item in test but not in train
train_user_ids = train_df['user_id'].unique()
train_item_ids = train_df['item_id'].unique()

test_df = test_df[
  (test_df['user_id'].isin(train_user_ids)) & \
  (test_df['item_id'].isin(train_item_ids))
]
print('Size of test set before/ after(remove user/item not in train set):', len(test), len(test_df))

In [None]:
test_df['user_id_idx'] = le_user.transform(test_df['user_id'].values)
test_df['item_id_idx'] = le_item.transform(test_df['item_id'].values)

In [None]:
n_users = train_df['user_id_idx'].nunique()
n_items = train_df['item_id_idx'].nunique()
print("Number of Unique Users : ", n_users)
print("Number of unique Items : ", n_items)

In [None]:
interected_items_df = train_df.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()
interected_items_df

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

# data loader

In [None]:
def data_loader(data, batch_size, n_usr, n_itm):

    def sample_neg(x):
        while True:
            neg_id = random.randint(0, n_itm - 1)
            if neg_id not in x:
                return neg_id

    interected_items_df = data.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()
    indices = [x for x in range(n_usr)]

    if n_usr < batch_size:
        users = [random.choice(indices) for _ in range(batch_size)]
    else:
        users = random.sample(indices, batch_size)
    users.sort()
    users_df = pd.DataFrame(users,columns = ['users'])

    interected_items_df = pd.merge(interected_items_df, users_df, how = 'right', left_on = 'user_id_idx', right_on = 'users')
    pos_items = interected_items_df['item_id_idx'].apply(lambda x : random.choice(x)).values
    neg_items = interected_items_df['item_id_idx'].apply(lambda x: sample_neg(x)).values

    print(
        torch.LongTensor(list(users)).to(device),'\n',
        torch.LongTensor(list(pos_items)).to(device) + n_usr,'\n',
        torch.LongTensor(list(neg_items)).to(device) + n_usr, '\n'
    )
    return (
        torch.LongTensor(list(users)).to(device),
        torch.LongTensor(list(pos_items)).to(device) + n_usr,
        torch.LongTensor(list(neg_items)).to(device) + n_usr
    )

# data_loader(train_df, 16, n_users, n_items)

# Edge Index

In [None]:
u_t = torch.LongTensor(train_df.user_id_idx)
i_t = torch.LongTensor(train_df.item_id_idx) + n_users

train_edge_index = torch.stack((
  torch.cat([u_t, i_t]),
  torch.cat([i_t, u_t])
)).to(device)
train_edge_index

# LGConv layer

In [None]:
test_x = torch.Tensor(np.eye(5))
test_edge_index = torch.LongTensor(np.array([
  [0, 0, 1, 1, 2, 3, 3, 4],
  [2, 3, 3, 4, 0, 0, 1, 1]
]))

In [None]:
from torch_geometric.nn import LGConv
LGConv()(test_x, test_edge_index)

# BPR Loss + Eval_Metrics

# Train_and_eval

In [None]:
users, pos_items, neg_items = data_loader(train_df, , n_users, n_items)

In [None]:
from torch_geometric.nn import LightGCN
from torch_geometric.loader import DataLoader
from torch_geometric.utils import negative_sampling

In [None]:
latent_dim = 64
n_layers = 3

EPOCHS = 50
BATCH_SIZE = 10
LR = 0.005
K = 20

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
model = LightGCN(num_nodes=n_users+n_items, embedding_dim=latent_dim, num_layers=n_layers).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
print("Size of Learnable Embedding : ", [x.shape for x in list(model.parameters())])

In [None]:
# edge_label need to indicate positive and negative edges
# edge_index for the graph
# data = dataset[0].to(device)

index = [[0, 0, 1, 1, 2, 3, 3, 4], [2, 3, 3, 4, 0, 0, 1, 1]]

loader = DataLoader(index, batch_size=2, shuffle=True)
# loader = DataLoader(train_edge_index, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
from torch_geometric.datasets import LastFM

In [None]:
dataset = LastFM(root="./LastFM")

In [None]:
loader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
dataset[0]

In [None]:
for batch in loader:
    print(batch.num_edges)

In [None]:
model.train()

for epoch in range(EPOCHS):
    for batch in loader:
        optimizer.zero_grad()
        edge_index = train_edge_index
        pos_label =
        neg_label = negative_sampling(pos_label)
        total_label =
        out = model(edge_index, total_label)
        loss = model.recommendation_loss(out[pos_label], out[neg_label])
        loss.backward()
        optimizer.step()

In [None]:
# cc = LGCN.get_embedding(train_edge_index)
# cc

In [None]:
# cc.shape

In [None]:
# light_loss, light_bpr, light_reg, light_recall, light_precision = train_and_eval(lightgcn, optimizer, train_df)

In [None]:
LGCN.eval()

# pred = model(data).argmax(dim=1)
# correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
# acc = int(correct) / int(data.test_mask.sum())
# print(f'Accuracy: {acc:.4f}')

In [None]:
# usr: 0, 1; itm: 2, 3, 4
# positive: 0 - 2,3 ; 1 - 3, 4
# negative: 0 - 4; 1 - 2

test_edge_index

In [53]:
test_edge_index_1 = torch.as_tensor([[0,  0,  1,  1,  2,  3,  3,  4],
                                     [12, 13, 13, 14, 10, 10, 11, 11],])

In [41]:
from torch_geometric.utils import negative_sampling
from torch_geometric.utils import structured_negative_sampling
neg_edge_index = structured_negative_sampling(test_edge_index_1)
neg_edge_index

(tensor([0, 0, 1, 1, 2, 3, 3, 4]),
 tensor([12, 13, 13, 14, 10, 10, 11, 11]),
 tensor([ 6,  1, 12,  6,  4, 14,  5,  4]))

In [70]:
from torch.utils.data import DataLoader
idx = list(range(8))
random.shuffle(idx)

loader = DataLoader(idx, batch_size=2, shuffle=True)

In [75]:
for batch in loader:
    print(batch)
    p = (neg_edge_index[0][batch], neg_edge_index[1][batch], neg_edge_index[2][batch])
    print(p)

tensor([2, 6])
(tensor([1, 3]), tensor([13, 11]), tensor([12,  5]))
tensor([1, 7])
(tensor([0, 4]), tensor([13, 11]), tensor([1, 4]))
tensor([4, 0])
(tensor([2, 0]), tensor([10, 12]), tensor([4, 6]))
tensor([5, 3])
(tensor([3, 1]), tensor([10, 14]), tensor([14,  6]))


In [30]:
from torch_geometric.data import Data
data = Data(edge_index=test_edge_index_1)

In [36]:
from torch_geometric.transforms import RandomLinkSplit

transform = RandomLinkSplit(is_undirected=False, split_labels=True, add_negative_train_samples=False, num_val=0, num_test=2)
train_data, val_data, test_data = transform(data)



In [40]:
print(train_data.pos_edge_label_index)
print(val_data.pos_edge_label_index)
print(test_data.pos_edge_label_index)

tensor([[ 1,  1,  3,  3,  0,  0],
        [14, 13, 10, 11, 12, 13]])
tensor([], size=(2, 0), dtype=torch.int64)
tensor([[ 4,  2],
        [11, 10]])


tensor([[ 4,  0],
        [11, 12]])

In [None]:
test_data.edge_index

In [None]:
x_s = torch.randn(2, 16)

In [None]:
x_s