# Graph recommendation

In [None]:
%load_ext kedro.extras.extensions.ipython

In [None]:
%reload_kedro

In [None]:
from typing import Iterator, Tuple
import re
import pickle
import _pickle as cPickle

from kedro.extras.datasets.pandas import CSVDataSet
from kedro.io.core import get_filepath_str
import numpy as np
import pandas as pd
import dgl
from dgl.sampling import sample_neighbors, select_topk
from dgl import load_graphs, save_graphs, DGLHeteroGraph
from dgl.data.heterograph_serialize import HeteroGraphData
from typing import Any, Dict, List, Tuple
import torch

from recommender_gnn.extras.datasets.graph_dataset import DGSRSubGraphsDataSet
from recommender_gnn.pipelines.graph_recommendation_modeling.nodes import generate_graph_dgsr, preprocess_dgsr, sample_negatives_dgsr
from recommender_gnn.extras.datasets.chunks_dataset import (
 _concat_chunks,
)

pd.options.mode.chained_assignment = None
pd.set_option('expand_frame_repr', True)
pd.set_option("display.max_rows", 999)
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

In [None]:
transactions_graph_path = f"{dataset}.transactions_graph"
transactions_mapped_path = f"{dataset}_transactions_mapped"
users_mapping_path = f"{dataset}_users_mapping"
items_mapping_path = f"{dataset}_items_mapping"
neg_transactions_path = f"{dataset}_dgsr_negative_transactions_samples"

In [None]:
transactions_mapped = _concat_chunks(context.catalog.load(transactions_mapped_path))
neg_transactions = context.catalog.load(neg_transactions_path)

In [None]:
from ast import literal_eval

In [None]:
neg_transactions[0]

# DGL graph loading

In [None]:
def save_graphs_python(save_filepath: str, graph: dgl.DGLGraph, graph_dict: Dict) -> None:
    """Save heterographs into file using only Python functions instead of dgl C implementation"""
    if graph_dict is None:
        graph_dict = {}
    if isinstance(graph, DGLHeteroGraph):
        graph = [graph]
        graph_dict = [graph_dict]
    assert all(
        [type(g) == DGLHeteroGraph for g in graph]
    ), "Invalid DGLHeteroGraph in graph argument"
    gdata_list = [
        [g, graph_dict[i]] for i, g in enumerate(graph)
    ]
    with open(save_filepath, "wb") as file:
        pickle.dump(gdata_list, file)

In [None]:
graph_path = "path"

In [None]:
with open(graph_path, 'rb') as f:
    gdata_list = pickle.load(f)

In [None]:
gdata_list[0][1]

In [None]:
gdata_list = load_graphs(graph_path)

In [None]:
graph = gdata_list[0][0]
graph_dict = gdata_list[1]

In [None]:
type(gdata_list[0][0]) == DGLHeteroGraph

In [None]:
save_filepath = "path"

In [None]:
save_graphs_python(save_filepath, graph, graph_dict)

In [None]:
with open(save_filepath, "wb") as file:
        pickle.dump(gdata_list, file)

# Saving testing fixtures graphs

## Whole graph fixture

In [None]:
def mapped_transactions_custom():
    transactions_dict = {
        "user_id": {
            "0": 0,
            "1": 0,
            "2": 0,
            "3": 0,
            "4": 0,
            "5": 1,
            "6": 1,
            "7": 1,
            "8": 1,
            "9": 1,
            "10": 2,
            "11": 2,
            "12": 2,
            "13": 2,
            "14": 2,
            "15": 3,
            "16": 3,
            "17": 3,
            "18": 3,
            "19": 3,
            "20": 4,
        },
        "item_id": {
            "0": 0,
            "1": 1,
            "2": 2,
            "3": 2,
            "4": 3,
            "5": 0,
            "6": 1,
            "7": 2,
            "8": 5,
            "9": 3,
            "10": 0,
            "11": 1,
            "12": 2,
            "13": 4,
            "14": 3,
            "15": 7,
            "16": 1,
            "17": 2,
            "18": 6,
            "19": 3,
            "20": 8,
        },
        "time": {
            "0": 1453939200,
            "1": 1453039200,
            "2": 1453032200,
            "3": 1453132200,
            "4": 1453132200,
            "5": 1453939201,
            "6": 1453039202,
            "7": 1453032203,
            "8": 1453132204,
            "9": 1453132205,
            "10": 1453939206,
            "11": 1453039207,
            "12": 1453032208,
            "13": 1453132209,
            "14": 1453132210,
            "15": 1453939211,
            "16": 1453039212,
            "17": 1453032213,
            "18": 1453132214,
            "19": 1453132215,
            "20": 1453939216,
        },
    }
    transactions_df = pd.DataFrame(transactions_dict)
    return transactions_df

In [None]:
transactions_custom = mapped_transactions_custom()

In [None]:
graph_custom = generate_graph_dgsr(transactions_custom)

In [None]:
graph_path = "../src/tests/fixtures/graphs/graph_custom.pkl"

In [None]:
with open(graph_path, 'wb') as f:
    pickle.dump(graph_custom, f, protocol=-1)

## Subsets graph fixtures

In [None]:
_, val_list, test_list, _ = preprocess_dgsr(
            transactions_custom,
            graph_custom,
            50,
            50,
            3,
            True,
            True,
            False,
        )

In [None]:
train_list, _, _, predict_list = preprocess_dgsr(
            transactions_custom,
            graph_custom,
            50,
            50,
            3,
            False,
            False,
            True,
        )

In [None]:
subsets = [train_list, val_list, test_list, predict_list]
subnames = ["train", "val", "test", "predict"]


In [None]:
for subname, subset in zip(subnames, subsets):
    save_args = {"file_extension": "pkl"}
    subset_dataset = DGSRSubGraphsDataSet(f"../src/tests/fixtures/graphs/{subname}_subgraphs", save_args)
    subset_dataset._save(subset)

## Negatives samples fixture

In [None]:
transactions_custom = mapped_transactions_custom()
negatives = sample_negatives_dgsr(transactions_custom)
negatives.to_csv("../src/tests/fixtures/dataframes/negatives.csv", index=False)

# Subgraphs lists fixture

In [None]:
def graph_custom():
    graph_path = "../src/tests/fixtures/graphs/graph_custom.pkl"
    with open(graph_path, "rb") as f:
        graph = pickle.load(f)
    return graph

In [None]:
def create_subgraphs_lists_custom():
    """Example function for creating a custom train/val/test/predict subgraphs lists for testing purposes. Only
    for fixtures reconstruction purposes."""
    transactions_custom = mapped_transactions_custom()
    full_graph_custom = graph_custom()
    _, val_list, test_list, _ = preprocess_dgsr(
        transactions_custom,
        full_graph_custom,
        50,
        50,
        3,
        True,
        True,
        False,
    )
    train_list, _, _, predict_list = preprocess_dgsr(
        transactions_custom,
        full_graph_custom,
        50,
        50,
        3,
        False,
        False,
        True,
    )
    subsets = [train_list, val_list, test_list, predict_list]
    subnames = ["train", "val", "test", "predict"]
    return subsets, subnames

In [None]:
def create_subgraphs_lists_pickles_custom():
    """Example function for creating a custom train/val/test/predict subgraphs lists pickles for testing purposes. Only
    for fixtures reconstruction purposes."""
    subsets, subnames = create_subgraphs_lists_custom()
    sub_dict = dict(zip(subnames, subsets))
    for subname in subnames:
        save_path = f"../src/tests/fixtures/graphs/{subname}_subgraphs_lists.pkl"
        with open(save_path, "wb") as file:
            pickle.dump(sub_dict[subname], file, protocol=-1)

In [None]:
create_subgraphs_lists_pickles_custom()