# Part 2: Graph Convolutional Networks (GCN)
## Assignment 2
### Name: Saikat Moi
### Roll Number: 20CS10050

In [33]:
# import important libbraries

import os
import math
import numpy as np
import time

#for plotting

import matplotlib.pyplot as plt
import seaborn as sns

import tqdm

# pytorch

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

# torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms


import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cpu


In [34]:
import torch_geometric
import torch_geometric.nn as g_nn
import torch_geometric.data as g_data
import networkx as nx

In [35]:
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.functional import relu, dropout
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

def load_data():
    # Read .content file to get node features and labels
    with open("../../dataset/cora.content", "r") as content_file:
        content_lines = content_file.readlines()
    
    # Read .cites files to build the citation graph
    train_cites = np.loadtxt("../../dataset/cora_train.cites", dtype=int)
    test_cites = np.loadtxt("../../dataset/cora_test.cites", dtype=int)
    
    # Create a directed graph
    citation_graph = nx.DiGraph()
    train_graph = nx.DiGraph()
    test_graph = nx.DiGraph()
    
    # Add edges to the graph
    for paper1, paper2 in train_cites:
        citation_graph.add_edge(paper2, paper1)  # Adding the edge with correct direction
        train_graph.add_edge(paper2, paper1)
        

    
    for paper1, paper2 in test_cites:
        citation_graph.add_edge(paper2, paper1)  # Adding the edge with correct direction
        test_graph.add_edge(paper2, paper1)

    
    # Extract node features and labels
    node_features = {}
    node_labels = {}
    labels = []
    for line in content_lines:
        data = line.strip().split()
        paper_id = int(data[0])
        class_label = data[-1]
        node_features[paper_id] =([int(x) for x in data[1:-1]])
        global features_dimension
        features_dimension=len(node_features[paper_id])
        node_labels[paper_id] = class_label 
        labels.append(class_label)      
    

    #print(labels)
    my_set = set(labels)
    print(my_set)
    labels=list(my_set)
    
    
    # Map paper IDs to one-hot encoded labels
    for line in content_lines:
        data = line.strip().split()
        paper_id = int(data[0])
        class_label = data[-1]
        node_labels[paper_id] = labels.index(class_label)
          

        

    return node_features, node_labels, citation_graph,train_graph,test_graph


node_features, node_labels, citation_graph,train_graph,test_graph= load_data()

features_train = []
labels_train=[]
for node_id in train_graph.nodes():
    features_train.append(node_features[node_id])
    labels_train.append(node_labels[node_id])

features_test = []
labels_test=[]

for node_id in test_graph.nodes():

    features_test.append(node_features[node_id])
    labels_test.append(node_labels[node_id])


features_all = []
labels_all=[]
for node_id in citation_graph.nodes():
    features_all.append(node_features[node_id])
    labels_all.append(node_labels[node_id])


adj_train= nx.adjacency_matrix(train_graph).todense()
adj_test=nx.adjacency_matrix(test_graph).todense()
adj_all=nx.adjacency_matrix(citation_graph).todense()


# Convert adjacency matrix and feature matrix to PyTorch tensors
adj_train = torch.FloatTensor(adj_train)
adj_test = torch.FloatTensor(adj_test)
features_train = torch.FloatTensor(features_train)
features_test = torch.FloatTensor(features_test)
labels_train = torch.LongTensor(labels_train)
labels_test = torch.LongTensor(labels_test)
adj_all = torch.FloatTensor(adj_all)
labels_all = torch.LongTensor(labels_all)
features_all = torch.FloatTensor(features_all)

{'Theory', 'Reinforcement_Learning', 'Probabilistic_Methods', 'Case_Based', 'Genetic_Algorithms', 'Rule_Learning', 'Neural_Networks'}


In [36]:

class GraphConvolutionLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraphConvolutionLayer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, adjacency_matrix, input_features):
        output = torch.mm(adjacency_matrix, input_features)  
        output = self.linear(output)  
        return output

class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolutionLayer(input_dim, hidden_dim)
        self.gc2 = GraphConvolutionLayer(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, adjacency_matrix, input_features):
        h = self.gc1(adjacency_matrix, input_features)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.gc2(adjacency_matrix, h)
        return h

In [37]:
# Initialize model
input_dim = features_dimension
hidden_dim = 16
output_dim = 7
dropout_rate = 0.5
model = GCN(input_dim, hidden_dim, output_dim, dropout_rate)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
epochs = 500
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(adj_train, features_train)
    loss = criterion(output, labels_train)
    loss.backward()
    optimizer.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}')

Epoch [1/500], Loss: 1.9907572269439697
Epoch [2/500], Loss: 1.8694099187850952
Epoch [3/500], Loss: 1.724833607673645
Epoch [4/500], Loss: 1.6306660175323486
Epoch [5/500], Loss: 1.5436969995498657
Epoch [6/500], Loss: 1.4944558143615723
Epoch [7/500], Loss: 1.4548755884170532
Epoch [8/500], Loss: 1.4074416160583496
Epoch [9/500], Loss: 1.3831820487976074
Epoch [10/500], Loss: 1.3370227813720703
Epoch [11/500], Loss: 1.279876947402954
Epoch [12/500], Loss: 1.2909314632415771
Epoch [13/500], Loss: 1.2848511934280396
Epoch [14/500], Loss: 1.2377833127975464
Epoch [15/500], Loss: 1.2289363145828247
Epoch [16/500], Loss: 1.2160353660583496
Epoch [17/500], Loss: 1.2297110557556152
Epoch [18/500], Loss: 1.18934166431427
Epoch [19/500], Loss: 1.197855830192566
Epoch [20/500], Loss: 1.1703835725784302
Epoch [21/500], Loss: 1.1651896238327026
Epoch [22/500], Loss: 1.1777784824371338
Epoch [23/500], Loss: 1.1843152046203613
Epoch [24/500], Loss: 1.163120985031128
Epoch [25/500], Loss: 1.1713186

In [38]:
from sklearn.metrics import classification_report

# Define the class labels
class_labels = ['Theory', 'Reinforcement_Learning', 'Probabilistic_Methods', 
                'Case_Based', 'Genetic_Algorithms', 'Rule_Learning', 'Neural_Networks']


model.eval()
with torch.no_grad():
    output = model(adj_test, features_test)
    predicted_labels = torch.argmax(output, dim=1)

# Convert predicted labels and ground truth labels to numpy arrays
predicted_labels_np = predicted_labels.numpy()
labels_test_np = labels_test.numpy()

In [39]:
report = classification_report(labels_test_np, predicted_labels_np, target_names=class_labels)

# Print the classification report
print(report)

# Write the classification report to a file
with open("gcn_metrics.txt", "w") as file:
    file.write(report)

                        precision    recall  f1-score   support

                Theory       0.40      0.10      0.16       174
Reinforcement_Learning       0.56      0.04      0.07       128
 Probabilistic_Methods       0.72      0.10      0.18       207
            Case_Based       0.72      0.16      0.26       166
    Genetic_Algorithms       0.82      0.18      0.29       210
         Rule_Learning       0.37      0.07      0.12        98
       Neural_Networks       0.30      0.95      0.45       366

              accuracy                           0.34      1349
             macro avg       0.56      0.23      0.22      1349
          weighted avg       0.54      0.34      0.26      1349

