In [56]:
import os
import sys

sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "src"))

from torch_geometric.datasets import Planetoid
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_dense_adj

from utils.conn_data import save_pickle

## Generate dataset

In [2]:
dataset = Planetoid(root='tmp/Cora', name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [4]:
# number of graphs
print("Number of graphs: ", len(dataset))

# number of features
print("Number of features: ", dataset.num_features)

# number of classes
print("Number of classes: ", dataset.num_classes)

Number of graphs:  1
Number of features:  1433
Number of classes:  7


In [16]:
dataset[0]

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [5]:
# select the first graph
data = dataset[0]

# number of nodes
print("Number of nodes: ", data.num_nodes)

# number of edges
print("Number of edges: ", data.num_edges)

# check if directed
print("Is directed: ", data.is_directed())

Number of nodes:  2708
Number of edges:  10556
Is directed:  False


## Description

The dataset consists of 2708 scientific publications classified into one of seven classes. The edges refere to citations among publications and totalled 5429 edges.

The data.x has dimension (num of publications, num of unique words).

In [30]:
print(data.x.shape)
data.x

torch.Size([2708, 1433])


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [45]:
data.y.shape

torch.Size([2708])

In [32]:
data.y.unique()

tensor([0, 1, 2, 3, 4, 5, 6])

In [40]:
y_lables = {

    "Neural_Networks": 0,
    "Probabilistic_Methods": 1,
    "Genetic_Algorithms": 2,
    "Theory": 3,
    "Case_Based": 4,
    "Reinforcement_Learning": 5,
    "Rule_Learning": 6
    
}

In [48]:
# Create a NetworkX graph
G = nx.Graph()

# Add nodes
# If your graph data includes node features, you can add them here
for node_id in range(data.num_nodes):
    G.add_node(node_id)  # Add any node attributes if available

# Add edges
# Edge index is typically a 2xN tensor where each column is an edge
edge_index = data.edge_index
for i in range(edge_index.shape[1]):
    source, target = edge_index[:, i]
    G.add_edge(source.item(), target.item())  # Add any edge attributes if available

# edge_index to adjacency matrix
adj_matrix = to_dense_adj(data.edge_index, max_num_nodes=data.num_nodes)[0]


In [51]:
graph_info = {

    "G": G,
    "Adj": adj_matrix,
    "Node": data.num_nodes,
    "torch_graph_data": data

}

In [57]:
inputs_path = os.path.join(os.path.dirname(os.getcwd()), "src", "data", "inputs", "cora")
save_pickle(obj=graph_info,
                path=os.path.join(inputs_path,
                                  "graph_info.pickle"))