In [1]:
## mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
## import library 
import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
import torch
import torch.nn.functional as F
import torch.optim as optim 
import torch.nn as nn
import random
import math
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import copy
from tqdm import tqdm

In [5]:
## load adj matrix and features 
adj_matrix = pd.read_csv('/content/drive/MyDrive/Thesis_5_1/data/master-graph-adj-undirected.csv')
feature_matrix = pd.read_csv('/content/drive/MyDrive/Thesis_5_1/data/feature-vector-final.csv')

In [6]:
feature_matrix = feature_matrix.drop('Unnamed: 0',axis=1)
feature_matrix.set_index('Name',inplace=True)
adj_matrix.set_index('Nodes', inplace=True)

In [12]:
## converting to sparse matrix and then torch.tensor 
features = sp.csr_matrix(feature_matrix.astype(pd.SparseDtype("float64",0)).sparse.to_coo())
adj = adj_matrix.astype(pd.SparseDtype("float64",0)).sparse.to_coo()
feature_vector = torch.FloatTensor(np.array(features.todense()))
sparse_mx = adj.tocoo().astype(np.float32)
indices = torch.from_numpy(np.vstack((sparse_mx.row,sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
adj_vector = torch.sparse.FloatTensor(indices,values,shape)

In [13]:
## Graph Convolution Layer Module 
class GraphConvolution(Module):
  def __init__(self,in_features,out_features,bias=True):
    super(GraphConvolution,self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.weight = Parameter(torch.FloatTensor(in_features,out_features))
    if bias:
      self.bias = Parameter(torch.FloatTensor(out_features))
    else:
      self.register_parameter('bias',None)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1./math.sqrt(self.weight.size(1))
    self.weight.data.uniform_(-stdv,stdv)
    if self.bias is not None:
      self.bias.data.uniform_(-stdv,stdv)

  def forward(self,input,adj):
    support = torch.mm(input,self.weight)
    output = torch.spmm(adj,support)
    if self.bias is not None:
      return output + self.bias
    else:
      return output

In [40]:
## model class
class GCN(Module):
  def __init__(self,nfeat,nhid,nhid2,nembed,dropout):
    super(GCN,self).__init__()
    self.gcn1 = GraphConvolution(nfeat,nhid)
    self.gcn2 = GraphConvolution(nhid,nhid2)
    self.gcn3 = GraphConvolution(nhid2,nembed)
    self.dropout = dropout

  def forward(self,x,adj):
    x = F.relu(self.gcn1(x,adj))
    x = F.dropout(x,self.dropout,training = self.training)
    x = F.relu(self.gcn2(x,adj))
    x = F.dropout(x,self.dropout,training = self.training)
    x = self.gcn3(x,adj)
    return x

In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN(nfeat=feature_vector.shape[1],nhid=64,nhid2=32,nembed=16,dropout=0.25)
optimizer = optim.Adam(model.parameters())

if device == 'cuda':
  model.cuda()
  feature_vector.cuda()
  adj_vector = adj_vector.cuda()

In [42]:
embed = np.array([])
epochs = 50
for epoch in range(epochs):
  with torch.no_grad():
    model.train()
    node_embedd = model(feature_vector,adj_vector)
    optimizer.step()
    optimizer.zero_grad()
    embed = node_embedd.cpu().detach().numpy()

In [43]:
embed.shape

(1082, 16)

In [44]:
embedding_df = pd.DataFrame(embed)
all_nodes_list = pd.read_csv('/content/drive/MyDrive/Thesis_5_1/data/final-nodes.csv')
embedding_df['nodes'] = all_nodes_list['Nodes']
embedding_df.to_csv('/content/drive/MyDrive/Thesis_5_1/data/nodes-embedding-GCN-3-layer.csv')