# Practicing Link Prediction on Heterogneous Graphs with PyG
source: https://medium.com/@pytorch_geometric/link-prediction-on-heterogeneous-graphs-with-pyg-6d5c29677c70 \
companion notebooks: 
1. https://colab.research.google.com/drive/1r_FWLSFf9iL0OWeHeD31d_Opt031P1Nq
2. https://colab.research.google.com/drive/1xpzn1Nvai1ygd_P5Yambc_oe4VBPK_ZT [some values needs to be filled]

In [1]:
from torch_geometric.data import download_url, extract_zip, HeteroData
import pandas as pd
import torch
import torch_geometric.transforms as T
from torch_geometric.loader import LinkNeighborLoader
from torch_geometric.nn import SAGEConv, to_hetero
import torch.nn.functional as F
import tqdm

## Dataset

### Downloading the dataset
in the `datasets/` folder under the current directory

In [2]:
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
extract_zip(download_url(url, 'datasets'), 'datasets')

movies_path = 'datasets/ml-latest-small/movies.csv'
ratings_path = 'datasets/ml-latest-small/ratings.csv'

Using existing file ml-latest-small.zip
Extracting datasets\ml-latest-small.zip


### Exploring the dataset

In [3]:
movies_df = pd.read_csv(movies_path)
ratings_df = pd.read_csv(ratings_path)

In [4]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
movies_df.shape, ratings_df.shape

((9742, 3), (100836, 4))

## Preprocessing

In [7]:
# Split genres and convert into indicator variables:
genres = movies_df['genres'].str.get_dummies('|') # did not know it was possible to do this
# here creating a feature matrix for the movies using the genres
genres.head()

Unnamed: 0,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
# Use genres as movie input features:
movie_feat = torch.from_numpy(genres.values).to(torch.float)
# converting the features df to tensor
movie_feat.shape

torch.Size([9742, 20])

In [9]:
movie_feat # 9742 movies, 20 genres, 1 if movie has genre, 0 if not, each row is a movie, each column is a genre

tensor([[0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [10]:
# mapping user id to consecutive unique ids
unique_user_id = ratings_df['userId'].unique()
unique_user_id = pd.DataFrame(data={
    'userId': unique_user_id,
    'mappedID': pd.RangeIndex(len(unique_user_id)),
})
unique_user_id.head()

Unnamed: 0,userId,mappedID
0,1,0
1,2,1
2,3,2
3,4,3
4,5,4


In [11]:
unique_user_id.shape

(610, 2)

In [12]:
# mapping movie id to consecutive unique ids
# `unique_movie_id = ratings_df['movieId'].unique()` # for users it was ok as there were no user features to map
# but for movies this technique is not right, as we have features for movies, so we need to map the unique movie ids to the movie ids in the features df
# notebook 1 made this mistake, notebook 2 corrects it
unique_movie_id = pd.DataFrame(data={
    'movieId': movies_df['movieId'].unique(), # unique is unnecessary here, as movieId is already unique, at least should be, if not then we have a problem
    'mappedID': pd.RangeIndex(len(movies_df['movieId'].unique())),
})
unique_movie_id.head()

Unnamed: 0,movieId,mappedID
0,1,0
1,2,1
2,3,2
3,4,3
4,5,4


In [13]:
unique_movie_id.tail()

Unnamed: 0,movieId,mappedID
9737,193581,9737
9738,193583,9738
9739,193585,9739
9740,193587,9740
9741,193609,9741


In [14]:
unique_movie_id.shape

(9742, 2)

In [15]:
# merging the ratings df with the unique user id df in order to get the mapped id as the first column of the edge index
ratings_user_id_df = pd.merge(ratings_df['userId'], unique_user_id,
                            left_on='userId', right_on='userId', how='left')

# creating the first column of the edge index, which is the user id
ratings_user_id = torch.from_numpy(ratings_user_id_df['mappedID'].values)
ratings_user_id

tensor([  0,   0,   0,  ..., 609, 609, 609])

In [16]:
ratings_user_id_df.head()

Unnamed: 0,userId,mappedID
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0


In [17]:
# merging the ratings df with the unique movie id df in order to get the mapped id as the second column of the edge index
ratings_movie_id_df = pd.merge(ratings_df['movieId'], unique_movie_id,
                            left_on='movieId', right_on='movieId', how='left')
# creating the second column of the edge index, which is the movie id
ratings_movie_id = torch.from_numpy(ratings_movie_id_df['mappedID'].values)
ratings_movie_id

tensor([   0,    2,    5,  ..., 9462, 9463, 9503])

In [18]:
ratings_movie_id_df.head()

Unnamed: 0,movieId,mappedID
0,1,0
1,3,2
2,6,5
3,47,43
4,50,46


In [19]:
# creating the edge index
edge_index = torch.stack([
        ratings_user_id, ratings_movie_id
], dim=0)
edge_index

tensor([[   0,    0,    0,  ...,  609,  609,  609],
        [   0,    2,    5,  ..., 9462, 9463, 9503]])

### Creating the heterogeneous graph

In [20]:
unique_movie_id

Unnamed: 0,movieId,mappedID
0,1,0
1,2,1
2,3,2
3,4,3
4,5,4
...,...,...
9737,193581,9737
9738,193583,9738
9739,193585,9739
9740,193587,9740


In [21]:
movies_df.movieId.unique().shape

(9742,)

In [22]:
data = HeteroData()

data["user"].node_id = torch.arange(len(unique_user_id))
data["movie"].node_id = torch.arange(len(unique_movie_id))

In [23]:
(torch.arange(len(unique_user_id)) == torch.from_numpy(unique_user_id['mappedID'].values)).unique(return_counts=True) # not sure, but possibly `arrange` is the faster way to do this

(tensor([True]), tensor([610]))

In [24]:
data["movie"].x = movie_feat
# NOTE: the `x` attribute is used to store the feature matrix of each node type
# and there's no feature matrix for the user nodes
data["movie"].x.shape

torch.Size([9742, 20])

In [25]:
data["user", "rates", "movie"].edge_index = edge_index

In [26]:
# `user` nodes only hase the node_id attribute, no features or other attributes
# `movie` nodes have the node_id attribute and a feature matrix
# `user` nodes are connected to `movie` nodes by edges with the `rates` relation

data

HeteroData(
  user={ node_id=[610] },
  movie={
    node_id=[9742],
    x=[9742, 20],
  },
  (user, rates, movie)={ edge_index=[2, 100836] }
)

In [27]:
# transforming the directed graph into an undirected graph by introducing the reverse edges, `rev_rates` relation 
data = T.ToUndirected()(data)
data

HeteroData(
  user={ node_id=[610] },
  movie={
    node_id=[9742],
    x=[9742, 20],
  },
  (user, rates, movie)={ edge_index=[2, 100836] },
  (movie, rev_rates, user)={ edge_index=[2, 100836] }
)

In [28]:
test_data = HeteroData()
test_data["yo", "gos", "bro"].edge_index = torch.randint(0, 10, (2, 5))
test_data

HeteroData(
  (yo, gos, bro)={ edge_index=[2, 5] }
)

In [29]:
T.ToUndirected()(test_data)

HeteroData(
  (yo, gos, bro)={ edge_index=[2, 5] },
  (bro, rev_gos, yo)={ edge_index=[2, 5] }
)

### Train-val-test split

In [30]:
transform = T.RandomLinkSplit(
    num_val = 0.1, # 10% of the edges will be used for validation
    num_test = 0.1, # 10% of the edges will be used for testing, the rest 80% of the edges will be used for training
    disjoint_train_ratio= 0.3, # i think 30% of the training edges will be for supervision during training and the rest 70% is for message passing
    # `edge_index` will hold the training edges and `edge_label_index` will hold the supervision edges of trainning data
    # not sure about the other two sets though
    # when `disjoint_train_ratio` is set to 0, all training edges will be used for supervision during training
    neg_sampling_ratio= 2.0, # 2 negative samples will be added for each positive sample
    add_negative_train_samples=False, # not going to add negative samples to the training set, is this not removing the effect
    # of `neg_sampling_ratio` parameter?
    edge_types=('user', 'rates', 'movie'), # defining the relation type"
    rev_edge_types=('movie', 'rev_rates', 'user'), # defining the reverse relation type
)

In [31]:
train_data, val_data, test_data = transform(data)
[train_data, val_data, test_data]
# train data set has 49411+21175=70586 edges, 49411 edges for supervision and 21175 edges for message passing
# all unique
# val data set has 70586 edges, 30249 of which has labels
# test data set has 80699 edges, 60501 of which has labels
# so how is that? 
# how can the test data set have more edges than the train data set?

[HeteroData(
   user={ node_id=[610] },
   movie={
     node_id=[9742],
     x=[9742, 20],
   },
   (user, rates, movie)={
     edge_index=[2, 56469],
     edge_label=[24201],
     edge_label_index=[2, 24201],
   },
   (movie, rev_rates, user)={ edge_index=[2, 56469] }
 ),
 HeteroData(
   user={ node_id=[610] },
   movie={
     node_id=[9742],
     x=[9742, 20],
   },
   (user, rates, movie)={
     edge_index=[2, 80670],
     edge_label=[30249],
     edge_label_index=[2, 30249],
   },
   (movie, rev_rates, user)={ edge_index=[2, 80670] }
 ),
 HeteroData(
   user={ node_id=[610] },
   movie={
     node_id=[9742],
     x=[9742, 20],
   },
   (user, rates, movie)={
     edge_index=[2, 90753],
     edge_label=[30249],
     edge_label_index=[2, 30249],
   },
   (movie, rev_rates, user)={ edge_index=[2, 90753] }
 )]

### Mini-batching

In [32]:
def linkLoaderWrapper(data: HeteroData):

    edge_label_index = data["user", "rates", "movie"].edge_label_index
    edge_label = data["user", "rates", "movie"].edge_label
    
    loader = LinkNeighborLoader(
        data = data,
        
        num_neighbors = [20, 10], # In the first hop, we sample at most 20 neighbors. In the second hop, we sample at most 10 neighbors. not sure yet what this means
        neg_sampling_ratio = 2.0, # 2 negative samples will be added for each positive sample 'on-the-fly'
        edge_label_index = (
            ("user", "rates", "movie"), 
            edge_label_index
        ), # edges used to create the mini-batches / subgraphs
        edge_label = edge_label,

        batch_size = 128,
        shuffle = True
    )
    return loader

In [33]:
train_loader = linkLoaderWrapper(train_data)
train_loader

LinkNeighborLoader()

In [34]:
sampled_data = next(iter(train_loader))
sampled_data

HeteroData(
  user={
    node_id=[608],
    n_id=[608],
  },
  movie={
    node_id=[2760],
    x=[2760, 20],
    n_id=[2760],
  },
  (user, rates, movie)={
    edge_index=[2, 17181],
    edge_label=[384],
    edge_label_index=[2, 384],
    e_id=[17181],
    input_id=[128],
  },
  (movie, rev_rates, user)={
    edge_index=[2, 7645],
    e_id=[7645],
  }
)

In [35]:
sampled_data = next(iter(train_loader))
sampled_data

HeteroData(
  user={
    node_id=[607],
    n_id=[607],
  },
  movie={
    node_id=[2787],
    x=[2787, 20],
    n_id=[2787],
  },
  (user, rates, movie)={
    edge_index=[2, 17406],
    edge_label=[384],
    edge_label_index=[2, 384],
    e_id=[17406],
    input_id=[128],
  },
  (movie, rev_rates, user)={
    edge_index=[2, 7718],
    e_id=[7718],
  }
)

In [36]:
sampled_data = next(iter(train_loader))
sampled_data

HeteroData(
  user={
    node_id=[610],
    n_id=[610],
  },
  movie={
    node_id=[2769],
    x=[2769, 20],
    n_id=[2769],
  },
  (user, rates, movie)={
    edge_index=[2, 17207],
    edge_label=[384],
    edge_label_index=[2, 384],
    e_id=[17207],
    input_id=[128],
  },
  (movie, rev_rates, user)={
    edge_index=[2, 7846],
    e_id=[7846],
  }
)

In [37]:
sampled_data["user"].node_id.shape, sampled_data["user"].num_nodes

(torch.Size([610]), 610)

In [38]:
sampled_data.edge_index_dict

{('user',
  'rates',
  'movie'): tensor([[  45,   10,   95,  ...,  119,  127,  353],
         [   0,    0,    0,  ..., 2300, 2300, 2300]]),
 ('movie',
  'rev_rates',
  'user'): tensor([[375, 106, 376,  ..., 458, 775, 494],
         [  0,   0,   0,  ..., 545, 545, 545]])}

In [39]:
sampled_data.x_dict

{'movie': tensor([[0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 1.,  ..., 0., 0., 0.]])}

## Model

In [40]:
class GNN(torch.nn.Module): # encoder
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = SAGEConv(hidden_channels, hidden_channels)
        self.conv2 = SAGEConv((hidden_channels, hidden_channels), hidden_channels)
        
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2((x, x), edge_index)
        return x
    
    def __call__(self, *args, **kwargs):
        print("GNN called")
        print(args, kwargs)
        return super().__call__(*args, **kwargs)

In [41]:
class Classifier(torch.nn.Module): # decoder
    def forward(self, x_user: torch.Tensor, x_movie: torch.Tensor, edge_label_index: torch.Tensor):
        edge_feat_user = x_user[edge_label_index[0]] # selecting the features of the nodes only in the subgraph defined by the edge_label_index (supervision set)
        edge_feat_movie = x_movie[edge_label_index[1]]
        return (edge_feat_user * edge_feat_movie).sum(dim=-1) # computing the dot product of the features of the nodes in the subgraph, sum(subject_nodes * object_nodes)

In [42]:
class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()

        self.movie_lin = torch.nn.Linear(20, hidden_channels)

        # as dataset is not very feature rich, we generate (learn) two embeddings for each node type
        self.user_emb = torch.nn.Embedding(data["user"].num_nodes, hidden_channels) # generating random(!?) embeddings for the user nodes
        self.movie_emb = torch.nn.Embedding(data["movie"].num_nodes, hidden_channels)

        self.gnn = GNN(hidden_channels) # encoder
        self.gnn = to_hetero(self.gnn, data.metadata()) # converting the GNN to a heterogenous GNN!
        self.classifier = Classifier() # decoder
    
    
    def forward(self, data: HeteroData) -> torch.Tensor:
        # `x_dict` holds the feature matrix of all node type
        # the `x_dict` with the `data` does not have anything for the `user` node type, as the `user` nodes do not have any features
        # so we are building an `x_dict` with the features of the `movie` nodes and the embeddings of the `user` nodes
        x_dict = {
            "user": self.user_emb(data["user"].node_id), # selecting the embeddings of the user nodes, are we not selecting all the embeddings?
            "movie": self.movie_lin(data["movie"].x) + self.movie_emb(data["movie"].node_id), # adding [element-wise] the (trained) features of the movies with the new embeddings of the movies
            # so we are learning embeddings for the movies twice?
        }

        print("x_dict:-", x_dict)
        print("data.edge_index_dict:-", data.edge_index_dict)
        print(x_dict["movie"])
        
        x_dict = self.gnn(x_dict, data.edge_index_dict)
        # `edge_index_dict` holds the edge index of all relation types

        pred = self.classifier(x_dict["user"], x_dict["movie"], data["user", "rates", "movie"].edge_label_index)

        return pred

In [43]:
test_emb = torch.nn.Embedding(data["user"].num_nodes, 10)
test_emb

Embedding(610, 10)

In [44]:
test_emb.weight.shape

torch.Size([610, 10])

In [45]:
data.metadata() # metadata of the heterogenous graph
# metadata[0] holds the node types
# metadata[1] holds the edge types

(['user', 'movie'],
 [('user', 'rates', 'movie'), ('movie', 'rev_rates', 'user')])

this whole thing's quite confusing, specially the namings \
it seems like sometimes same thing can be accessed by different names \
or different things can be accessed by names quite similar to each other \
hella confusing, at least so far

examples: \
`edge_index` and `edge_label_index` \
`data["user", "rates", "movie"].edge_label_index` and `data.edge_index_dict[("user", "rates", "movie")]` \
etc.

## Trainning

In [46]:
train_loader = linkLoaderWrapper(train_data)
train_loader

LinkNeighborLoader()

In [47]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [48]:
model = Model(hidden_channels=64).to(device)
model

  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


Model(
  (movie_lin): Linear(in_features=20, out_features=64, bias=True)
  (user_emb): Embedding(610, 64)
  (movie_emb): Embedding(9742, 64)
  (gnn): GraphModule(
    (conv1): ModuleDict(
      (user__rates__movie): SAGEConv(64, 64, aggr=mean)
      (movie__rev_rates__user): SAGEConv(64, 64, aggr=mean)
    )
    (conv2): ModuleDict(
      (user__rates__movie): SAGEConv((64, 64), 64, aggr=mean)
      (movie__rev_rates__user): SAGEConv((64, 64), 64, aggr=mean)
    )
  )
  (classifier): Classifier()
)

In [49]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [50]:
model(train_data.to(device))

x_dict:- {'user': tensor([[ 0.9520, -1.4156,  1.9573,  ...,  1.3740, -1.2360, -1.0918],
        [-0.3410,  0.3875,  0.3528,  ...,  0.4325, -0.0878,  0.7170],
        [ 1.8927, -1.8302, -0.5237,  ...,  1.6886,  1.0078, -0.7601],
        ...,
        [ 0.3011, -1.6824, -0.1533,  ..., -0.8090, -0.3014, -0.7080],
        [ 0.1532, -1.4077,  0.3137,  ...,  0.7894, -0.8490,  0.1494],
        [-0.4863, -0.5454, -1.9786,  ...,  1.6544, -0.5266, -0.4472]],
       device='cuda:0', grad_fn=<EmbeddingBackward0>), 'movie': tensor([[ 0.6425,  0.1221,  1.1334,  ...,  0.7525, -0.5464, -1.1403],
        [-0.5630, -1.7999, -0.6234,  ..., -0.1322, -0.0713, -0.0610],
        [-0.2109,  0.0455,  0.2262,  ...,  1.6161,  1.3001,  0.8588],
        ...,
        [-0.5602, -1.1230, -0.9346,  ...,  0.0422, -1.0303,  1.5769],
        [ 0.1707,  0.9993, -0.0961,  ..., -0.9955,  1.3842,  0.9349],
        [ 0.2288, -0.0666, -0.4697,  ..., -0.7250, -0.5584, -0.8381]],
       device='cuda:0', grad_fn=<AddBackward0>)}
d

AttributeError: 'tuple' object has no attribute 'dim'

In [51]:
for epoch in range(1, 6):
    total_loss = total_examples = 0
    for sampled_data in tqdm.tqdm(train_loader):
        optimizer.zero_grad() # zeroing the gradients
        sampled_data = sampled_data.to(device)
        
        print(sampled_data)
        pred = model(sampled_data) # forward pass
        
        ground_truth = sampled_data["user", "rates", "movie"].edge_label
        loss = F.binary_cross_entropy_with_logits(pred, ground_truth) # computing the loss
        loss.backward() # backward pass
        
        optimizer.step() # updating the parameters
        
        total_loss += loss.item() * pred.numel() # `numel` returns the number of elements in the tensor
        total_examples += pred.numel()
    
    print(f"Epoch {epoch:03d}: train loss {total_loss / total_examples:.4f}")

  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/190 [00:00<?, ?it/s]


HeteroData(
  user={
    node_id=[605],
    n_id=[605],
  },
  movie={
    node_id=[2762],
    x=[2762, 20],
    n_id=[2762],
  },
  (user, rates, movie)={
    edge_index=[2, 17605],
    edge_label=[384],
    edge_label_index=[2, 384],
    e_id=[17605],
    input_id=[128],
  },
  (movie, rev_rates, user)={
    edge_index=[2, 7851],
    e_id=[7851],
  }
)
x_dict:- {'user': tensor([[-0.3410,  0.3875,  0.3528,  ...,  0.4325, -0.0878,  0.7170],
        [ 1.8927, -1.8302, -0.5237,  ...,  1.6886,  1.0078, -0.7601],
        [-0.0106, -0.5854, -1.3801,  ..., -0.7361, -0.9928,  0.4445],
        ...,
        [-0.2627,  0.8648, -0.2811,  ...,  1.6395,  1.5637, -1.5371],
        [-0.9634,  1.6853,  1.1703,  ...,  1.2882, -1.0497, -0.3536],
        [ 0.8950, -1.8852, -0.7841,  ..., -0.6088,  0.4444,  0.9362]],
       device='cuda:0', grad_fn=<EmbeddingBackward0>), 'movie': tensor([[-0.3844, -0.2293, -1.3449,  ..., -1.7227,  0.7767, -1.0496],
        [ 0.5838, -2.1732,  1.2072,  ...,  0.3382, -0.518

AttributeError: 'tuple' object has no attribute 'dim'

In [52]:
torch.Tensor([1, 2, 3, 3]).numel()

4

## Example of a het gnn gone right

In [53]:
import torch
from torch_geometric.nn import SAGEConv, to_hetero

class GNN2(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), 32)
        self.conv2 = SAGEConv((32, 32), 32)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        return x

model2 = GNN2()

node_types = ['author', 'paper']
edge_types = [
    ('author', 'writes', 'paper'),
    ('paper', 'cites', 'paper'),
    ('paper', 'written_by', 'author'),
]
metadata = (node_types, edge_types)

x_dict = {
    "author": torch.randn((20, 5)),
    "paper": torch.randn((25, 4))
}
auths = torch.randint(0, 20, (10,))
paps = torch.randint(0, 25, (10,))
edge_index_dict = {
    ('author', 'writes', 'paper'): torch.stack((auths, paps)),
    ('paper', 'cites', 'paper'): torch.randint(0, 25, (2, 10)),
    ('paper', 'written_by', 'author'): torch.stack((paps, auths))
}

model2 = to_hetero(model2, metadata)
model2(x_dict, edge_index_dict)

{'author': tensor([[0.0000, 0.1573, 0.0000, 0.0000, 0.2531, 0.1678, 0.2737, 0.3328, 0.0206,
          0.1651, 0.0845, 0.0000, 0.0000, 0.0000, 0.0571, 0.0000, 0.0000, 0.0193,
          0.0000, 0.0702, 0.0000, 0.1840, 0.2005, 0.0000, 0.0000, 0.0000, 0.0000,
          0.1876, 0.0000, 0.3220, 0.2971, 0.3778],
         [0.0000, 0.1108, 0.0000, 0.0000, 0.0000, 0.0000, 0.4385, 0.3311, 0.0000,
          0.4554, 0.0000, 0.0000, 0.1417, 0.0000, 0.0000, 0.0000, 0.2335, 0.1601,
          0.0000, 0.3007, 0.0000, 0.1033, 0.3556, 0.0071, 0.0000, 0.0412, 0.0831,
          0.0000, 0.0000, 0.0000, 0.0000, 0.6886],
         [0.1687, 0.0000, 0.0000, 0.1705, 0.0000, 0.2716, 0.4504, 0.0701, 0.1509,
          0.3530, 0.0307, 0.0000, 0.1314, 0.0000, 0.0000, 0.1756, 0.1084, 0.0000,
          0.0000, 0.3543, 0.4484, 0.0000, 0.0000, 0.1295, 0.0000, 0.0000, 0.8107,
          0.0569, 0.0000, 0.5997, 0.2878, 0.0376],
         [0.0000, 0.0000, 0.0750, 0.0000, 0.2361, 0.0000, 0.1362, 0.0000, 0.2585,
          0.0000,

In [54]:
model3 = GNN2()
model3 = to_hetero(model3, train_data.metadata())
model3.to(device)
model3

GraphModule(
  (conv1): ModuleDict(
    (user__rates__movie): SAGEConv((-1, -1), 32, aggr=mean)
    (movie__rev_rates__user): SAGEConv((-1, -1), 32, aggr=mean)
  )
  (conv2): ModuleDict(
    (user__rates__movie): SAGEConv((32, 32), 32, aggr=mean)
    (movie__rev_rates__user): SAGEConv((32, 32), 32, aggr=mean)
  )
)

In [55]:
x_dict = sampled_data.x_dict
x_dict["user"] = torch.zeros(len(sampled_data["user"].node_id), 64).to(device)
model3(
    # {
    #     "user": torch.zeros(len(sampled_data["user"].node_id), 64).to(device),
    #     "movie": sampled_data["movie"].x.to(device)
    # },
    x_dict,
    # {
    #     ('user', 'rates', 'movie'): sampled_data["user", "rates", "movie"].edge_index.to(device),
    #     ('movie', 'rev_rates', 'user'): sampled_data["movie", "rev_rates", "user"].edge_index.to(device)
    # }
    sampled_data.edge_index_dict
)


{'user': tensor([[0.0121, 0.0000, 0.0120,  ..., 0.0000, 0.0257, 0.0000],
         [0.0000, 0.0000, 0.0295,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0191,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0927, 0.0000, 0.0974,  ..., 0.0000, 0.0000, 0.0000],
         [0.0927, 0.0000, 0.0974,  ..., 0.0000, 0.0000, 0.0000],
         [0.0927, 0.0000, 0.0974,  ..., 0.0000, 0.0000, 0.0000]],
        device='cuda:0', grad_fn=<ReluBackward0>),
 'movie': tensor([[0.0000, 0.0000, 0.0875,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.1182, 0.0560,  ..., 0.0000, 0.0000, 0.0709],
         [0.0000, 0.0000, 0.0478,  ..., 0.0000, 0.0000, 0.0982],
         ...,
         [0.0000, 0.0067, 0.0778,  ..., 0.0000, 0.0000, 0.0352],
         [0.0000, 0.0139, 0.1604,  ..., 0.0000, 0.0000, 0.1290],
         [0.0062, 0.0000, 0.2689,  ..., 0.0000, 0.0000, 0.0463]],
        device='cuda:0', grad_fn=<ReluBackward0>)}

In [56]:
torch.zeros(len(train_data["user"].node_id), 64).shape, train_data["movie"].x.shape

(torch.Size([610, 64]), torch.Size([9742, 20]))

In [57]:
sampled_data.edge_index_dict

{('user',
  'rates',
  'movie'): tensor([[ 119,  138,  277,  ...,  514,  276,  276],
         [   0,    0,    0,  ..., 2353, 2354, 2355]], device='cuda:0'),
 ('movie',
  'rev_rates',
  'user'): tensor([[ 370,  371,  372,  ..., 2760, 2761, 2194],
         [   0,    0,    0,  ...,  537,  537,  537]], device='cuda:0')}