In [1]:
import dgl
import torch
import torch.nn as nn
import _pickle as cPickle
import torch.nn.functional as F
import bz2
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from dgl.nn import GraphConv
from sklearn.metrics import classification_report
from torchmetrics.functional import precision_recall

Using backend: pytorch


In [2]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats,out_feats):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, out_feats)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        with g.local_scope():
            g.ndata['h'] = h
            return dgl.mean_nodes(g, 'h')

In [3]:
class MLP(nn.Module):
    def __init__(self,in_feats,h_feats,num_classes):
        super(MLP,self).__init__()
        self.fc1=nn.Linear(in_feats,h_feats)
        self.fc2=nn.Linear(h_feats,num_classes)
    
    def forward(self,x):
        x=F.relu(self.fc1(x))
        return self.fc2(x)

In [4]:
class Model(nn.Module):
    def __init__(self,in_feats,h_feats,out_feats,num_class):
        self.conv=GCN(in_feats,h_feats,out_feats)
        self.fc=MLP(out_feats,10,num_class)
    
    def forward(self,g,feats):
        c1=F.relu(self.conv(g,feats))
        c2=F.softmax(self.fc(feats))
        return c2

In [3]:
g= bz2.BZ2File("kfold2/graph_dataset/graphs_data_All.pbz2", 'rb')
dataset=cPickle.load(g)
# g1= bz2.BZ2File("kfold/graph_dataset/graphs_data_1000.pbz2", 'rb')
# g2= bz2.BZ2File("kfold/graph_dataset/graphs_data_2000.pbz2", 'rb')
# g3= bz2.BZ2File("kfold/graph_dataset/graphs_data_3000.pbz2", 'rb')
# g4= bz2.BZ2File("kfold/graph_dataset/graphs_data_3600.pbz2", 'rb')
# dataset1 = cPickle.load(g1)
# dataset2 = cPickle.load(g2)
# dataset3 = cPickle.load(g3)
# dataset4 = cPickle.load(g4)

In [5]:
# dataset=dataset1+dataset2+dataset3+dataset4

In [6]:
g_test=bz2.BZ2File("kfold2/graph_dataset/graphs_data_All.pbz2", 'rb')
dataset_test = cPickle.load(g_test)

In [7]:
import random
random.shuffle(dataset)

In [8]:
num_test=len(dataset_test)
print(num_test)

1200


In [9]:
test_sampler = SubsetRandomSampler(torch.arange(num_test))
test_dataloader = GraphDataLoader(dataset_test, sampler=test_sampler, batch_size=5, drop_last=False)

In [10]:
# Create the model with given dimensions
model = GCN(5, 16, 3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
folds=10
num_examples=len(dataset)
fsize=num_examples//10
val_reports=[]
for i in range(folds):
    trll = 0
    trlr = i * fsize
    vall = trlr
    valr = i * fsize + fsize
    trrl = valr
    trrr = num_examples

    train_left_indices = torch.arange(trll,trlr)
    train_right_indices = torch.arange(trrl,trrr)

    train_indices = torch.cat([train_left_indices,train_right_indices])
    val_indices = torch.arange(vall,valr)

    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=5, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=5, drop_last=False)


    for epoch in range(100):
        l=0.0
        cnt=0
        for batched_graph, labels in train_dataloader:
            pred = model(batched_graph, batched_graph.ndata['h'].float())
            loss = F.cross_entropy(pred, labels)
            l+=loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            cnt+=1
        if(epoch%10==0):
            print(f"epoch : {epoch+1} loss : {l/cnt}")
    
    with torch.no_grad():

        num_correct = 0
        num_tests = 0
        pred_labels=[]
        correct_labels=[]

        for batched_graph, labels in val_dataloader:
            pred = model(batched_graph, batched_graph.ndata['h'].float())
            num_correct += (pred.argmax(1) == labels).sum().item()
            pred_labels+=list(pred.argmax(1))
            correct_labels+=list(labels)

            num_tests += len(labels)
        print('Val accuracy:', num_correct / num_tests)
        val_reports.append(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))
    torch.save(model.state_dict,f"kfold/trained_model/classification_{i}.model")



In [75]:
torch.save(model.state_dict,"kfold/trained_model/classification")

In [87]:
print(val_reports[4])

              precision    recall  f1-score   support

         low       0.54      0.68      0.60        41
      medium       0.38      0.21      0.27        39
        high       0.53      0.62      0.57        40

    accuracy                           0.51       120
   macro avg       0.48      0.50      0.48       120
weighted avg       0.49      0.51      0.48       120



In [78]:
num_correct = 0
num_tests = 0
pred_labels=[]
correct_labels=[]

for batched_graph, labels in test_dataloader:
    pred = model(batched_graph, batched_graph.ndata['h'].float())
    num_correct += (pred.argmax(1) == labels).sum().item()
    pred_labels+=list(pred.argmax(1))
    correct_labels+=list(labels)

    num_tests += len(labels)
print('Test accuracy:', num_correct / num_tests)

Test accuracy: 0.6033057851239669


In [79]:
print(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))

              precision    recall  f1-score   support

         low       0.62      0.97      0.76       136
      medium       0.00      0.00      0.00        59
        high       0.48      0.30      0.37        47

    accuracy                           0.60       242
   macro avg       0.37      0.42      0.37       242
weighted avg       0.44      0.60      0.50       242



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
# rec,rec=precision_recall(torch.tensor(pred_labels,dtype=torch.long), torch.tensor(correct_labels,dtype=torch.long), average='none', num_classes=3)

In [80]:
num_examples = len(dataset)
num_train = int(num_examples)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=5, drop_last=False)

In [83]:
num_correct = 0
num_tests = 0
pred_labels=[]
correct_labels=[]
with torch.no_grad():
    for batched_graph, labels in train_dataloader:
        pred = model(batched_graph, batched_graph.ndata['h'].float())
        num_correct += (pred.argmax(1) == labels).sum().item()
        pred_labels+=list(pred.argmax(1))
        correct_labels+=list(labels)
        num_tests += len(labels)
    print('Train accuracy:', num_correct / num_tests)

Train accuracy: 0.4866666666666667


In [82]:
print(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))

              precision    recall  f1-score   support

         low       0.53      0.66      0.59       400
      medium       0.39      0.29      0.33       400
        high       0.50      0.51      0.51       400

    accuracy                           0.49      1200
   macro avg       0.48      0.49      0.48      1200
weighted avg       0.48      0.49      0.48      1200



In [28]:
# prec,rec=precision_recall(torch.tensor(pred_labels,dtype=torch.long), torch.tensor(correct_labels,dtype=torch.long), average='none', num_classes=3)
# print("precision : ",prec,"Recall : ",rec)

precision :  tensor([0.5212, 0.4233, 0.4444]) Recall :  tensor([0.6434, 0.2447, 0.5294])
