# **Imports**

In [9]:
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install pyg-lib -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

import requests
import pandas as pd
import io
import gzip
from torch_geometric.data import Data
import networkx as nx
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn.models import InnerProductDecoder, VGAE
from torch_geometric.nn.conv import GCNConv
from torch_geometric.utils import negative_sampling, remove_self_loops, add_self_loops
import os
from torch.optim import Adam
import torch_geometric.transforms as T
from torch_geometric.utils import train_test_split_edges
from torch_geometric.nn.models import Node2Vec

!pip freeze > requirements.txt

Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu118.html
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu118.html
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu118.html
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu118.html
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu118.html


# **Data acquisition**

In [10]:
# Define the URL for the data
url = "https://snap.stanford.edu/data/facebook_combined.txt.gz"

# Send an HTTP GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Extract the content from the response
    content = response.content

    # Decompress the content using gzip
    with gzip.open(io.BytesIO(content), 'rt') as f:
        # Read the data into a Pandas DataFrame
        df = pd.read_csv(f, sep=" ", header=None, names=["source", "target"])

else:
    print("Failed to download the data. Status code:", response.status_code)

# **Data cleansing**

In [11]:
# Remove duplicate edges and self-edges
condition = df['source'] > df['target']
df.loc[condition, ['source', 'target']] = (df.loc[condition, ['target', 'source']].values)
df = df.drop_duplicates()
df = df[df['source'] != df['target']]

# **Data preparation**

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
learning_rate = 0.01
num_features = 128
epoch = 100
hidden_channels = 32
out_channels = 16

# Generating node feature matrix using node2vec
edge_index = torch.tensor(df[['source', 'target']].values.T, dtype=torch.long)
n2v_model = Node2Vec(edge_index = edge_index, embedding_dim = num_features, walk_length = 80, context_size = 10, walks_per_node = 10, sparse = True)
data = Data(x=n2v_model.forward(), edge_index=edge_index)

# Splitting data into train, test and validation datasets
all_edge_index = data.edge_index.to(device)
all_edge_x = data.x.to(device)
data = train_test_split_edges(data, 0.05, 0.1).to(device)

