# Link Prediction

In [6]:
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import from_networkx 
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv, to_hetero_with_bases
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torch_geometric.utils import train_test_split_edges
from torch_geometric.nn import VGAE

## 1. data analysis

In [None]:
g = nx.read_graphml("airportsAndCoordAndPop.graphml")
nx.draw_networkx(g)

In [None]:
pos = {node: (float(g.nodes[node]['lon']), float(g.nodes[node]['lat'])) for node in g.nodes}
nx.draw_networkx(g, pos=pos, node_size=10, with_labels=False)

With Gephi we have optain this graph:
![](./gephi_analyse/airports_coord_pop_graph.svg)

In [None]:
g.nodes(data=True)

In [None]:
g.graph = {}
data = from_networkx(g, group_node_attrs=["lon", "lat", "population"])

num_nodes = data.num_nodes
train_ratio = 0.80 # 80% of nodes for training
# Randomly creating a mask
mask = torch.rand(num_nodes) < train_ratio # mask contains booleans value show if in the train dataset
data.train_mask = mask
data.test_mask = ~data.train_mask
# remove the attributes for the nodes that are not in the training set
temp = torch.zeros((num_nodes, 3), dtype=torch.float)
temp[data.train_mask] = data.x[data.train_mask]
data.x = temp
data_split = train_test_split_edges(data.copy(), 0.0, 0.2)


## 2. First Model

class Encoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, 2*out_channels)
        self.conv_mu = GCNConv(2*out_channels, out_channels)
        self.conv_logstd = GCNConv(2*out_channels, out_channels)
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)