In [1]:
from src.data_reader import DataReader
from src.constant import *
import matplotlib.pyplot as plt
import numpy as np
from src.datasets.dataset import Dataset
from src.models.itemknn.itemknn import ItemKNN
import pandas as pd
from src.evaluation import compute_mrr
from src.utils.sparse_matrix import interactions_to_sparse_matrix

In [2]:
dataset = Dataset()

In [3]:
split_dict = dataset.get_split()

In [4]:
train, train_label = split_dict[TRAIN]

In [5]:
sparse_interaction, user_mapping_dict, _ = interactions_to_sparse_matrix(
            train,
            items_num=dataset._ITEMS_NUM,
            users_num=None,
        )

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
sim = cosine_similarity(sparse_interaction.T, dense_output=False)

In [8]:
sim

<23691x23691 sparse matrix of type '<class 'numpy.float32'>'
	with 17019887 stored elements in Compressed Sparse Row format>

In [9]:
(17019887)/23691**2

0.030324237325535172

In [10]:
import similaripy

In [11]:

sim = similaripy.cosine(sparse_interaction.T, k=200)

Done: 100%|██████████| 23691/23691 [00:00<00:00, 28096.75it/s]


In [12]:
sim

<23691x23691 sparse matrix of type '<class 'numpy.float32'>'
	with 4738200 stored elements in COOrdinate format>

### Try to construct graph with PyG

In [13]:
from torch_geometric.data import Data
import torch
import numpy as np

In [14]:
coo_sim = sim.tocoo()

In [15]:
coo_sim.row

array([    0,     0,     0, ..., 23690, 23690, 23690], dtype=int32)

In [16]:
coo_sim.col

array([14648,     0,     0, ..., 21674, 19169,  7989], dtype=int32)

In [17]:
sim.data

array([0.02181141, 1.        , 0.        , ..., 0.04032389, 0.03492151,
       0.01893885], dtype=float32)

In [18]:
# we have to create the edges both ways
start_node_edge = np.concatenate([coo_sim.row, coo_sim.col])
arrival_node_edge = np.concatenate([coo_sim.col, coo_sim.row])

In [19]:

# create the graph
edge_index = torch.tensor(np.array([start_node_edge, arrival_node_edge]), dtype=torch.long)

In [20]:
item_features = dataset.get_oh_item_features()

In [21]:
item_features

Unnamed: 0_level_0,cat_1,cat_2,cat_3,cat_4,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,...,val_896,val_897,val_898,val_899,val_900,val_901,val_902,val_903,val_904,val_905
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,0,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,0,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23686,0,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
23687,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
23688,1,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
23689,0,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [22]:
x = torch.tensor(item_features.values, dtype=torch.float)

In [23]:
x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [24]:
data = Data(x=x, edge_index=edge_index)

In [25]:
data.num_nodes

23691

In [26]:
data.num_edges

9476400

In [27]:
data.has_isolated_nodes()

True

In [28]:
data.has_self_loops()

True

In [29]:
data.is_directed()

False

In [30]:
from torch_geometric.utils import to_networkx

In [31]:
#nx_graph = to_networkx(data, to_undirected=True)

In [32]:
#import networkx as nx
#[len(c) for c in sorted(nx.connected_components(nx_graph), key=len, reverse=True)]

In [33]:
graph_dataset = data

In [34]:
graph_dataset

Data(x=[23691, 963], edge_index=[2, 9476400])

In [35]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(graph_dataset.num_node_features, 256)
        self.conv2 = GCNConv(256, 128)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return x

In [36]:
torch.cuda.is_available()

True

In [37]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
model = GCN().to(device)
data = graph_dataset.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)    
out = model(data)
    
    

RuntimeError: CUDA out of memory. Tried to allocate 6.78 GiB (GPU 0; 10.92 GiB total capacity; 7.17 GiB already allocated; 3.05 GiB free; 7.19 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
out.shape