In [None]:
# Namespace
import torch.nn.functional as f
import torch_geometric as pyg
import itertools as it
# import networkx as nx
import torch

# Constructor.
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.typing import EdgeType, NodeType
from torch_geometric.loader import LinkNeighborLoader
from torch.nn.modules.loss import _Loss as Loss
from torch_geometric.data import HeteroData
from torch.utils.data import DataLoader
from torch.optim import Adam, Optimizer
from torch.nn import (
    Dropout1d,
    Embedding, 
    Module, 
    ModuleDict,
    ModuleList, 
    Linear, 
    LeakyReLU
)
from torch import Tensor

In [None]:
# Constant.
from project.config import TRG_EDGE

In [None]:
# Namespace.
import tqdm

## Data Handling

In [None]:
# Loads the datasets to compute the loss.
trn_data = torch.load('data/out/trn_Video_Games.pt')
vld_data = torch.load('data/out/vld_Video_Games.pt')

# Extracts the edge attribute indices for all datasets.
trn_edge_label_index = trn_data[TRG_EDGE].edge_label_index
vld_edge_label_index = vld_data[TRG_EDGE].edge_label_index
# Specifies the shared key-word arguments for the batch loaders.
kwargs = dict(
    num_neighbors=[0],  # [8, 4, 2],
    neg_sampling='triplet',
    num_workers=10,
    shuffle=True,
    pin_memory=True
)
# Creates the sub-graph loaders for the loss .
trn_loader = LinkNeighborLoader(**kwargs,
    data=trn_data,
    edge_label_index=[TRG_EDGE, trn_edge_label_index],
    batch_size=2048
)
vld_loader = LinkNeighborLoader(**kwargs,
    data=vld_data,
    edge_label_index=[TRG_EDGE, vld_edge_label_index],
    batch_size=2048
)

In [None]:
# Builds the dataset for the all-ranking protocol.
rnk_data = torch.load('data/out/rnk_vld_Video_Games.pt')
# Extracts the edge label index for the all-ranking data.
rnk_edge_label_index = rnk_data[TRG_EDGE].edge_label_index
# Creates a batch loader for the all-ranking data.
rnk_loader = LinkNeighborLoader(
    data=rnk_data,
    edge_label_index=[TRG_EDGE, rnk_edge_label_index],
    num_neighbors=[0],  # [8, 4, 2],
    batch_size=8192,
    num_workers=10,
    pin_memory=True
)

## Architecture

In [None]:
class EdgePredictor(Module):

    def __init__(self, *, trg_edge: EdgeType = TRG_EDGE) -> None:
        super().__init__()
        self.trg_edge = trg_edge

    
    @property
    def trg_src_node(self) -> NodeType:
        return self.trg_edge[0]
    

    @property
    def trg_edge_name(self) -> str:
        return self.trg_edge[1]
    

    @property
    def trg_dst_node(self) -> NodeType:
        return self.trg_edge[2]
    

    @property
    def trg_nodes(self) -> tuple[NodeType, NodeType]:
        return (self.trg_src_node, self.trg_dst_node)

In [None]:
class NodeEmbedding(ModuleDict):

    def __init__(self, 
        num_embeddings: dict[NodeType, int],
        embedding_dim: int,
        **kwargs
    ) -> None:
        super().__init__({
            node_type: Embedding(
                num_embeddings=num_embeddings,
                embedding_dim=embedding_dim, 
                **kwargs
            ) 
                for node_type, num_embeddings 
                in num_embeddings.items()
        })


    def forward(self, n_id_dict: dict[NodeType, Tensor]) -> dict[NodeType, Tensor]:
        return {
            node_type: self[node_type](n_id) 
                for node_type, n_id 
                in n_id_dict.items()
        }

In [None]:
class InnerProduct(Module): 

    def forward(self, x_src: Tensor, x_dst: Tensor) -> Tensor:
        return torch.bmm(
            x_src.unsqueeze(-2),
            x_dst.unsqueeze(-1)
        ).squeeze()

In [None]:
class EdgeRegressor(Linear):

    def __init__(self, in_dim: int, out_dim: int = 1, bias: bool = False, **kwargs) -> None:
        super().__init__(
            in_features=in_dim,
            out_features=out_dim,
            bias=bias,
            **kwargs
        )
        self.weight.data = torch.nn.init.ones_(self.weight.data)
        if self.bias:
            self.bias.data = torch.nn.init.zeros_(self.bias.data)


    def forward(self, x_src: Tensor, x_dst: Tensor) -> Tensor:
        return super().forward(x_src * x_dst)

### Matrix Factorization

In [None]:
class MF(EdgePredictor):

    def __init__(self, 
            num_embeddings: dict[NodeType, int], 
            embedding_dim: int,
            *,
            trg_edge: EdgeType,
            **kwargs
        ) -> None:
        super().__init__(trg_edge=trg_edge)
        self.embedding = NodeEmbedding(
            num_embeddings=num_embeddings, 
            embedding_dim=embedding_dim,
            **kwargs
        )
        self.regressor = InnerProduct()
        

    def forward(self, 
        n_id: dict[NodeType, Tensor], 
        edge_label_index: dict[EdgeType, Tensor], 
    ) -> Tensor:
        # Constructs the embeddings.
        x_src, x_dst = self.embedding({
            node_type: n_id[node_type] 
            for node_type 
            in self.trg_nodes
        }).values()
        # Extracts the edges to predict.
        i_src, i_dst = edge_label_index[self.trg_edge]
        # Computes and returns the predicted scores.
        return self.regressor(x_src[i_src], x_dst[i_dst])

### Generalized Matrix Factorization

In [None]:
class GMF(EdgePredictor):

    def __init__(self, 
            num_embeddings: dict[NodeType, int], 
            embedding_dim: int,
            *,
            trg_edge: EdgeType,
            **kwargs
        ) -> None:
        super().__init__(trg_edge=trg_edge)
        self.embedding = NodeEmbedding(
            num_embeddings=num_embeddings, 
            embedding_dim=embedding_dim,
            **kwargs
        )
        self.regressor = EdgeRegressor(
            in_dim=embedding_dim
        )
        

    def forward(self, 
        n_id: dict[NodeType, Tensor], 
        edge_label_index: dict[EdgeType, Tensor], 
    ) -> Tensor:
        # Constructs the embeddings.
        x_src, x_dst = self.embedding({
            node_type: n_id[node_type] 
            for node_type 
            in self.trg_nodes
        }).values()
        # Extracts the edges to predict.
        i_src, i_dst = edge_label_index[self.trg_edge]
        # Computes and returns the predicted scores.
        return self.regressor(x_src[i_src], x_dst[i_dst])

### Neural Graph Collaborative Filtering

In [None]:
class EmbeddingPropagationCell(Module):
    
    def __init__(self, 
        in_dim: int,
        out_dim: int = None, 
        bias: bool = False,
        dropout: float = .5
    ) -> None:
        super().__init__()
        self.drop = Dropout1d(dropout)
        self.loop = Linear(in_dim, out_dim or in_dim, bias=bias)
        self.intr = Linear(in_dim, out_dim or in_dim, bias=bias)
        self.actv = LeakyReLU()

    
    def forward(self, 
        x_src: Tensor, 
        x_dst: Tensor, 
        edge_index: Tensor,
        edge_weight: Tensor = None
    ) -> Tensor:
        # Applies the node dropout.
        x_src = self.drop(x_src)  # node dropout
        x_dst = self.drop(x_dst)  # node dropout
        # Computes the messages to pass.
        i_src, i_dst = edge_index
        z_src = self.loop(x_src)[i_src]
        z_int = self.intr(x_src[i_src] * x_dst[i_dst])
        z_msg = edge_weight * (z_src + z_int)
        z_msg = self.drop(z_msg)  # message dropout
        z_sum = pyg.utils.scatter(z_msg, i_dst, 
            dim_size=x_dst.size(0)
        )
        # Computes the self-messages.
        z_dst = self.loop(x_dst)
        z_dst = self.drop(z_dst)  # message dropout
        # Computes the new embeddings and returns them.
        x_new = self.actv(z_dst + z_sum)
        return x_new
    

class EmbeddingPropagationLayer(ModuleDict):

    def __init__(self, 
            edge_types: list[EdgeType], 
            in_dim: int, 
            out_dim: int = None, 
            **kwargs
        ) -> None:
        super().__init__({
            edge_label: EmbeddingPropagationCell(
                in_dim=in_dim, 
                out_dim=out_dim, 
                **kwargs
            )
                for (_, edge_label, _)
                in edge_types
        })


    def forward(self, 
        x: dict[NodeType, Tensor], 
        edge_index: dict[EdgeType, Tensor], 
        edge_weight: dict[EdgeType, Tensor]
    ) -> dict[NodeType, Tensor]:
        return {
            dst_node: self[edge_label](
                x_src=x[src_node], 
                x_dst=x[dst_node], 
                edge_index=edge_index[
                    src_node, edge_label, dst_node
                ],
                edge_weight=edge_weight[
                    src_node, edge_label, dst_node
                ]
            )
                for src_node, edge_label, dst_node 
                in edge_index
        }
    

class EmbeddingPropagation(ModuleList):
    
    def __init__(self, 
        embedding_dims: list[int], 
        edge_types: list[EdgeType],
        **kwargs
    ) -> None:
        super().__init__([
            EmbeddingPropagationLayer(
                edge_types=edge_types,
                in_dim=in_dim,
                out_dim=out_dim,
                **kwargs
            ) 
                for in_dim, out_dim 
                in it.pairwise(embedding_dims)
        ])


    def forward(self, 
        x: dict[NodeType, Tensor], 
        edge_index: dict[EdgeType, Tensor]
    ) -> dict[NodeType, Tensor]:
        # Constructs the edge weights.
        edge_weight = {
            edge_type: (
                pyg.utils.degree(i_src)[i_src]
                *
                pyg.utils.degree(i_dst)[i_dst]
            ).pow(-.5).unsqueeze(-1)
            for edge_type, (i_src, i_dst)
            in edge_index.items()
        }
        # Applies the embedding propagation layers.
        xs = [x]
        for module in self:
            x = module(x, edge_index, edge_weight)
            xs.append(x)
        # Concatenates all layers' embeddings and returns them.
        x_new = {
            node_type: torch.cat([
                    x_[node_type] for x_ in xs
            ], dim=-1) for node_type in x.keys()
        }
        return x_new
    

class EdgeRegressor(Linear): 

    def __init__(self, 
        in_features: int,
        out_features: int = 1,
        *,
        bias: bool = True,
        **kwargs
    ) -> None:
        super().__init__(in_features, out_features, bias=bias, **kwargs)


    def forward(self, x_src: Tensor, x_dst: Tensor) -> Tensor:
        return super().forward(x_src * x_dst)


# class EdgeRegressor(Module): 

#     def forward(self, x_src: Tensor, x_dst: Tensor) -> Tensor:
#         return (x_src * x_dst).sum(-1)
    
    
class NGCF(Module): 

    def __init__(self, 
        num_embeddings: dict[NodeType, int], 
        embedding_dims: list[int], 
        *,
        edge_types: list[EdgeType],
        src_node: NodeType,
        dst_node: NodeType,
        **kwargs
    ) -> None:
        super().__init__()
        self.embedding = NodeEmbedding(
            num_embeddings=num_embeddings, 
            embedding_dim=embedding_dims[0]
        )
        self.propagation = EmbeddingPropagation(
            embedding_dims=embedding_dims, 
            edge_types=edge_types,
            **kwargs
        )
        self.regressor = EdgeRegressor(
            in_features=sum(embedding_dims)
        )
        # self.regressor = EdgeRegressor()
        self.src_node = src_node
        self.dst_node = dst_node
    

    def forward(self, 
            n_id: dict[NodeType, Tensor],
            edge_index: dict[EdgeType, Tensor],
            edge_label_index: Tensor
        ) -> Tensor:
        # Generates and propagates the node embeddings.
        x = self.embedding(n_id)
        x = self.propagation(x, edge_index)
        # Computes the rank predictions and returns them.
        y = self.regressor(
            x[self.src_node][edge_label_index[0]], 
            x[self.dst_node][edge_label_index[1]]
        )
        # Returns the modified dataset.
        return y

## Utility

In [None]:
def dispatch_epoch(
    module: type[Module], 
    loader: type[DataLoader],
    criterion: type[Loss],
    *,
    batch_handler: callable,
    optimizer: type[Optimizer] = None, 
    device: torch.device = None,
    verbose: bool | int = None
) -> list[float]:

    # Ensures a device is specified.
    if device is None:
        device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu'
        )
    # Sends the model to the specified device.
    module = module.to(device)

    # Initializes the data structures for the verbose output.
    if verbose:
        cum_loss = 0

    # Initializes the loss trace buffer.
    loss_trace = []
    # Iterates over the data-loader's batches.
    for batch_id, batch in enumerate(loader, start=1):

        # Resets the gradient if an optimizer exists.
        if optimizer:
            optimizer.zero_grad()

        # Constructs the design and target data structures.
        loss = batch_handler(module, batch, criterion, 
            device=device
        )

        # Updates the module, if an optimizer has been given
        if optimizer:
            loss.backward()
            optimizer.step()

        # Logs the computed loss.
        loss = loss.item()
        loss_trace.append(loss)
        
        # Handles the verbose messaging, if verbose is set.
        if verbose:
            # Updates the cumulative loss sum.
            cum_loss += loss
            # Outputs the current statistics, if the correct index is present.
            if batch_id % verbose == 0:
                # Updates the tracked statistics.
                avg_loss = cum_loss / batch_id
                # Outputs the tracked staistics.
                print(
                    f'Batch({batch_id}): '
                    f'CumAvgLoss({avg_loss:.4f}) & '
                    f'BatchLoss({loss:.4f})',
                    end='\r',
                    flush=True
                )
    
    # Returns the traced loss.
    return loss_trace

In [None]:
def triplet_handler(
        data: HeteroData, 
        x: dict[NodeType, Tensor],
        *,
        src_node: NodeType,
        dst_node: NodeType
    ) -> tuple[Tensor, Tensor, Tensor]:
    
    # Extracts the source and destination nodes' indices.
    i_src = data[src_node].src_index
    i_pos = data[dst_node].dst_pos_index
    i_neg = data[dst_node].dst_neg_index

    # Constructs the node types' feature matrices.
    x_src = x[src_node][i_src]
    x_pos = x[dst_node][i_pos]
    x_neg = x[dst_node][i_neg]

    # Returns the source, positive and negative features.
    return x_src, x_pos, x_neg

In [None]:
def mf_handler(
    module: type[EdgePredictor], 
    data: HeteroData, 
    criterion: type[Loss],
    *,
    device: torch.device
) -> Tensor:
    
    # Sends the items to the correct device.
    data = data.to(device)
    # Computes the embedding propegation.
    x = module.embedding(data.n_id_dict)

    # Extracts the node target features.
    x_src, x_pos, x_neg = triplet_handler(data, x,
        src_node=module.trg_src_node,
        dst_node=module.trg_dst_node
    )

    # Computes the link scores.
    y_pos = module.regressor(x_src, x_pos)
    y_neg = module.regressor(x_src, x_neg)
    # Computes the loss.
    loss = criterion(y_pos, y_neg)

    # Returns the loss.
    return loss

## Training

In [None]:
class BPRLoss(Loss):

    def forward(self, y_pos: Tensor, y_neg: Tensor) -> Tensor:
        return - f.logsigmoid(y_pos - y_neg).mean()

In [None]:
batch = next(iter(trn_loader))
model = MF(
    num_embeddings=data.num_nodes_dict,
    embedding_dim=64,
    trg_edge=TRG_EDGE
)
display(model)

# Instanciates the learning algorithm and loss criterion.
optimizer = Adam(model.parameters(), 
    lr=1e-2,
    weight_decay=2e-6
)
criterion = BPRLoss()

In [None]:
# Clears the GPU cache.
torch.cuda.empty_cache()
bst_loss = float('inf')
trn_trace = []
vld_trace = []
for epoch_id in range(1, 16+1):
    print(f'Epoch({epoch_id})')

    # Dispatches one epoch of training.
    trn_loss = dispatch_epoch(
        module=model,
        loader=trn_loader,
        criterion=criterion,
        optimizer=optimizer,
        batch_handler=mf_handler,
        verbose=32
    )
    trn_trace.append(trn_loss)
    print()

    # Dispatches one epoch of validation.
    with torch.no_grad():
        vld_loss = dispatch_epoch(
            module=model,
            loader=vld_loader,
            criterion=criterion,
            batch_handler=mf_handler,
            verbose=32
        )
    vld_trace.append(vld_loss)
    print()

    # Keeps the best model.
    avg_loss = sum(vld_loss) / len(vld_loss)
    if bst_loss > avg_loss:
        bst_loss = avg_loss
        bst_state = {k: v.cpu() for k, v in model.state_dict().items()}
# Reinstates the best model paramters.
model.load_state_dict(bst_state)

### Evaluation

In [None]:
vld_ranker = MetricSampleLoader(
    data=vld_data,
    edge_label_index=[TRG_EDGE, vld_edge_label_index],
    num_neighbors=[0],
    num_workers=10,
    batch_size=512,
    pin_memory=True,
    neg_ratio=10
)

In [None]:
def precision_score(labels: Tensor, index: Tensor) -> Tensor:
    return labels[index].float().mean()


def recall_score(labels: Tensor, index: Tensor) -> Tensor:
    return labels[index].sum() / labels.sum()


def ap_score(labels: Tensor, index: Tensor) -> Tensor:
    indicator = labels[index]
    precision = indicator.cumsum(0) / torch.arange(1, indicator.size(0) + 1)
    return (indicator * precision).sum() / indicator.sum()


def ndcg_score(labels: Tensor, index: Tensor, *, coefs: Tensor = None) -> Tensor:
    if coefs is None:
        coefs = torch.log2(2 + torch.arange(index.size(0)))
    return (labels[index] * coefs).sum() / coefs.sum()

In [None]:
# Simulating function input.
loader = vld_ranker
module = model.cpu()

In [None]:
src_node, dst_node = module.trg_nodes
buffer = {}
with torch.no_grad():
    for batch in tqdm.tqdm(loader):
        output = module(
            n_id=batch.n_id_dict, 
            edge_label_index=batch.edge_label_index_dict
        ).squeeze()
        src_id, dst_id = batch[module.trg_nodes].edge_label_index
        src_uid = src_id.unique()
        src_mask = src_uid.unsqueeze(1) == src_id.unsqueeze(0)
        edge_label = batch[module.trg_nodes].edge_label
        for id, mask in zip(src_uid, src_mask):
            id = id.item()
            if id not in buffer:
                buffer[id] = ([], [], [])
            scores, ids, labels = buffer[id]
            scores.append(output[mask])
            ids.append(dst_id[mask])
            labels.append(edge_label[mask])
buffer = {
    src_id: tuple(
        torch.cat(buffer) for buffer in buffers
    ) 
    for src_id, buffers in buffer.items()
}


In [None]:
at_k = 20
metrics = []
for scores, _, labels in buffer.values():
    index = torch.topk(scores, k=min(scores.size(0), at_k)).indices
    precision = precision_score(labels, index)
    recall = recall_score(labels, index)
    ap = ap_score(labels, index)
    ndcg = ndcg_score(labels, index)
    metrics.append([precision, recall, ap, ndcg])
precision, recall, map, ndcg = torch.tensor(metrics).mean(0).tolist()
print(f'Precision@{at_k}({precision:.2%})')
print(f'Recall@{at_k}({recall:.2%})')
print(f'MAP@{at_k}({map:.2%})')
print(f'NDCG@{at_k}({ndcg:.2%})')

In [None]:
# Namespace.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Constructor.
from pandas import DataFrame


# Setting the plotting style.
sns.set()

# Typing.
TraceType = list[list[float]]

In [None]:
def parse_trace(trace: TraceType) -> DataFrame:
    return DataFrame(trace) \
        .rename_axis(index='epoch', columns='batch') \
        .transpose() \
        .stack() \
        .rename('loss') \
        .reset_index()


def make_frame(traces: list[TraceType], *, labels: list[str]) -> DataFrame:
    return pd.concat([
        parse_trace(trace).assign(trace=label) 
        for label, trace in zip(labels, traces)
    ])

In [None]:
# Reformats the loss traces.
trace = make_frame(
    traces=[trn_trace, vld_trace], 
    labels=['Train', 'Valid.']
)

# Plots the loss trace.
fig, ax = plt.subplots(figsize=[7, 3], dpi=192)
ax = sns.lineplot(
    data=trace, 
    x='epoch', 
    y='loss', 
    hue='trace', 
    style='trace', 
    errorbar='sd', 
    ax=ax
)
ax.set_xlabel('Epoch')
ax.set_ylabel('Average Loss')
ax.legend(title='Dataset')
plt.show(fig)

In [None]:
def ngcf_handler(
    module: type[Module], 
    data: HeteroData, 
    criterion: type[Loss],
    *,
    device: torch.device
) -> Tensor:
    
    # Sends the items to the correct device.
    data = data.to(device)

    # Computes the embedding propegation.
    x = module.embedding(data.n_id_dict)
    x = module.propagation(x, data.edge_index_dict)
    # Extracts the source nodes' features.
    i_src = data['user'].src_index
    x_src = x['user'][i_src]
    # Constructs the positive and negative feature matrices.
    i_pos = data['item'].dst_pos_index
    i_neg = data['item'].dst_neg_index
    x_pos = x['item'][i_pos]
    x_neg = x['item'][i_neg]
    # Computes the link scores.
    y_pos = module.regressor(x_src, x_pos)
    y_neg = module.regressor(x_src, x_neg)

    # Computes the loss.
    loss = criterion(y_pos, y_neg)

    # Returns the loss.
    return loss

In [None]:
# Initializes the neural graph model.
ngcf = NGCF(
    num_embeddings=data.num_nodes_dict,
    embedding_dims=[16] * 5,
    edge_types=data.edge_types,
    src_node='user',
    dst_node='item',
    dropout=.1,
)
display(ngcf)

# Instanciates the learning algorithm and loss criterion.
optimizer = Adam(ngcf.parameters(), 
    lr=1e-3,
    weight_decay=1e-5
)
criterion = BPRLoss()

In [None]:
# Clears the GPU cache.
torch.cuda.empty_cache()

trn_trace = []
vld_trace = []
for epoch_id in range(1, 16+1):
    print(f'Epoch({epoch_id})')

    # Dispatches one epoch of training.
    trn_loss = dispatch_epoch(
        module=ngcf,
        loader=trn_loader,
        criterion=criterion,
        optimizer=optimizer,
        batch_handler=ngcf_handler,
        verbose=16
    )
    trn_trace.append(trn_loss)
    print()

    # Dispatches one epoch of validation.
    with torch.no_grad():
        vld_loss = dispatch_epoch(
            module=ngcf,
            loader=vld_loader,
            criterion=criterion,
            batch_handler=ngcf_handler,
            verbose=16
        )
    vld_trace.append(vld_loss)
    print()