### Install pytorch geometric if not installed

In [None]:
#| code-fold: true

import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

In [None]:
import pandas as pd
import numpy as np
import networkx as nx

In [None]:
import torch
from torch_geometric.datasets import Planetoid, KarateClub, Amazon, AmazonProducts
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data

from sklearn.neighbors import KNeighborsTransformer

torch.manual_seed(12345)

### Public dataset

In [None]:
#TODO add my dataset
dataset_public = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
data_public = dataset[0]

In [None]:
data_public.train_mask = data.val_mask = data.test_mask = None
data_public = train_test_split_edges(data_public)

In [None]:
from torch_geometric.transforms import Compose, RandomRotate

torch.manual_seed(123)

random_rotate = Compose([
    RandomRotate(degrees=180, axis=0),
    RandomRotate(degrees=180, axis=1),
    RandomRotate(degrees=180, axis=2),
])


### Simulated dataset

In [None]:
root_dir = '/home/dim26fa/data/imod_models/mitochondria/'
train_pc = glob.glob(root_dir + '/preprocessed/train/*/*Localizations*.csv')
test_pc = glob.glob(root_dir + '/preprocessed/test/*/*Localizations*.csv')

In [None]:
#| code-fold: true


from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit

## designed for transductive learning
tfs = RandomLinkSplit(is_undirected=True, 
                      add_negative_train_samples=True,
                      neg_sampling_ratio=1.0,
                      key = "edge_label", # supervision label
                      disjoint_train_ratio=0,# disjoint mode if > 0
                      # edge_types=None, # for heteroData
                      # rev_edge_types=None, # for heteroData
                      )

In [None]:
# gotta fix this

from torch_geometric.transforms import KNNGraph

train_list = []
for idx, path in enumerate(train_pc):
    processed_dir = os.path.dirname(path)
    df = pd.read_csv(path)
    tens = torch.tensor(np.array(df), dtype=torch.float)
        #edge_index = knn_graph(tens, k=6)
    data = Data(pos=tens,
                #edge_index=edge_index,
                )
    transf = KNNGraph(3)
    data_t = transf(data)
    split = tfs(data_t)
    torch.save(split, osp.join(processed_dir, f'data_{idx}.pt'))
    train_list.append(split)

train_loader = DataLoader(train_list, batch_size=20, shuffle=True)

### Simulated triangles

In [None]:
data_ = pd.read_csv('/home/dim26fa/data/suresim_simulations/triangles/triangles_2.csv')
tens = torch.tensor(np.array(data_), dtype=torch.float)
transformer = KNeighborsTransformer(n_neighbors=17, algorithm='brute')
transformer.fit_transform(data_)
graph = transformer.kneighbors_graph()
graph_array = graph.toarray()
nx_graph = nx.from_numpy_array(graph_array)

In [None]:
data_

In [None]:
node_feat = np.array(data_)

In [None]:
edges = np.transpose(nx_graph.edges)

In [None]:
labels = []
nx.set_node_attributes(nx_graph, labels, "labels")
labels.append("foo")

### Try Delaunay triangulation

In [None]:
from scipy.spatial import Delaunay

In [None]:
points = np.array(data_)

In [None]:
tri = Delaunay(points)

In [None]:
tri.neighbors[0]

In [None]:
# get node feature matrix
nx.attr_matrix(nx_graph, node_attr='labels')

In [None]:
data = Data(x=torch.tensor(node_feat),
           #pos=torch.tensor(np.array(data_)),
           edge_index = torch.tensor(np.transpose(list(nx_graph.edges)))
           )

In [None]:
# deprecated function - gotta figure out RandomLinkSplit
data_split = train_test_split_edges(data)

In [None]:
data_split

In [None]:
ls_train, ls_val, ls_test = tfs(data)

In [None]:
ls_train

In [None]:
ls_train.edge_label_index

In [None]:
data_split

In [None]:
# quickly see graph 
import matplotlib.pyplot as plt
def show_graph_with_labels(adjacency_matrix, tresh):
    rows, cols = np.where(adjacency_matrix >= tresh)
    edges = zip(rows.tolist(), cols.tolist())
    gr = nx.Graph()
    gr.add_edges_from(edges)
    nx.draw(gr, node_size=1, with_labels=False)
    plt.show()
    return gr

In [None]:
gr_orig = show_graph_with_labels(graph_array, 0.8)

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(gr_orig)
net.show('graph.html')

In [None]:
def Extract(lst, index):
    return [item[index] for item in lst]

### Define encoder layer

In [None]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

### Define autoencoder

In [None]:
from torch_geometric.nn import GAE

In [None]:
# parameters
out_channels = 2
num_features = 3
epochs = 200


# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.float()
model = model.to(device)
x = data_split.x.to(device)
x = x.float()
train_pos_edge_index = data_split.train_pos_edge_index.to(device)
# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [None]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data_split.test_pos_edge_index, data_split.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

In [None]:
Z = model.encode(x, train_pos_edge_index)

In [None]:
decoded = torch.sigmoid(torch.matmul(Z,torch.transpose(Z,0,1)))

In [None]:
decoded_array = np.array(decoded.detach().cpu())

In [None]:
decoded_array

In [None]:
graph_array

In [None]:
gr_decoded = show_graph_with_labels(decoded_array, 0.8)

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(gr_decoded)
net.show('graph.html')