In [None]:
!nvidia-smi

In [None]:
!pip install pykeen networkx matplotlib -q

In [None]:
import torch
import pykeen
import numpy as np
import matplotlib.pyplot as plt
from pykeen.pipeline import pipeline
from pykeen.evaluation import RankBasedEvaluator
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, manhattan_distances
from pykeen.datasets import DBpedia50
from sklearn.neighbors import NearestNeighbors

In [None]:
# Function for visualizing node embeddings
def visualize_embeddings(embeddings, labels):
    plt.figure(figsize=(10, 8))
    plt.scatter(embeddings[:, 0], embeddings[:, 1], c=np.arange(len(labels)), cmap='viridis', s=25)
    for i, label in enumerate(labels):
        plt.annotate(label, (embeddings[i, 0], embeddings[i, 1]), alpha=1)
    plt.title('Scatter Plot of Embeddings')
    plt.show()


# Function for visualizing the similarity matrix
def display_matrix(similarity_matrix, title='Node Similarity Matrix'):
    plt.figure(figsize=(8, 8))
    plt.imshow(similarity_matrix, cmap='viridis', interpolation='nearest')
    plt.colorbar()
    plt.title(title)
    plt.show()

In [None]:
# Loading the graph dataset DBpedia50
dbpedia_dataset = DBpedia50()
entity_labels = list(dbpedia_dataset.testing.entity_id_to_label.values())

# Parameters for research
num_epochs_list = [5, 25, 125]
models = ["HolE", "DistMult"]
node_indices = [1324, 12240, 15386, 17242]  # Example node indices for analysis

for model_name in models:
    for num_epochs in num_epochs_list:
        # Model training
        pipeline_results = pipeline(
            model=model_name,
            dataset=dbpedia_dataset,
            training_kwargs=dict(num_epochs=num_epochs, batch_size=128),
            evaluation_kwargs=dict(batch_size=64),
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),

        )

        # Obtaining node embeddings
        node_embeddings = pipeline_results.model.entity_representations[0]._embeddings.weight.data.cpu().numpy()

        # Visualizing nodes
        visualize_embeddings(node_embeddings[:25], entity_labels[:25])

        # Measuring Node Similarity for selected nodes
        nodes = node_embeddings[node_indices]
        for method in [euclidean_distances, cosine_similarity, manhattan_distances]:
            similarity_matrix = method(nodes)
            display_matrix(similarity_matrix, title=f'{model_name} - {num_epochs} epochs')

        # Searching for neighbors for random nodes
        nn_model = NearestNeighbors(n_neighbors=5, metric='euclidean')
        nn_model.fit(node_embeddings)
        for node_index in node_indices:
            distances, indices = nn_model.kneighbors([node_embeddings[node_index]])
            neighbor_labels = [entity_labels[i] for i in indices.flatten()]
            print(f"Closest neighbors of node {entity_labels[node_index]}: {neighbor_labels}")