In [38]:
import dgl
import torch
import torch.nn as nn
import _pickle as cPickle
import torch.nn.functional as F
import bz2
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from dgl.nn import GraphConv
from sklearn.metrics import classification_report
from torchmetrics.functional import precision_recall

In [39]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats,out_feats):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, out_feats)

    def forward(self, g, in_feat):
        # print("In GCN")
        h = F.relu(self.conv1(g, in_feat))
        h = F.relu(self.conv2(g, h))
        with g.local_scope():
            g.ndata['h'] = h
            return dgl.mean_nodes(g, 'h')

In [40]:
class MLP(nn.Module):
    def __init__(self,in_feats,h_feats,num_classes):
        super(MLP,self).__init__()
        self.fc1=nn.Linear(in_feats,h_feats)
        self.fc2=nn.Linear(h_feats,num_classes)
    
    def forward(self,x):
        # print("In MLP")
        x=F.relu(self.fc1(x))
        return F.softmax(self.fc2(x),dim=1)

In [41]:
class Model(nn.Module):
    def __init__(self,in_feats,h_feats,out_feats,num_class):
        super(Model,self).__init__()
        self.conv=GCN(in_feats,h_feats,out_feats)
        self.fc=MLP(out_feats,10,num_class)
    
    def forward(self,g,feats):
        # print("In Model")
        c1=self.conv(g,feats)
        c2=self.fc(c1)
        return c2

In [42]:
# g= bz2.BZ2File("kfold2/graph_dataset/graphs_data_All.pbz2", 'rb')
# dataset=cPickle.load(g)

g1= bz2.BZ2File("kfold3/graph_dataset/graphs_data_1000.pbz2", 'rb')
g2= bz2.BZ2File("kfold3/graph_dataset/graphs_data_2000.pbz2", 'rb')
g3= bz2.BZ2File("kfold3/graph_dataset/graphs_data_3000.pbz2", 'rb')
g4= bz2.BZ2File("kfold3/graph_dataset/graphs_data_3600.pbz2", 'rb')
dataset1 = cPickle.load(g1)
dataset2 = cPickle.load(g2)
dataset3 = cPickle.load(g3)
dataset4 = cPickle.load(g4)
dataset=dataset1+dataset2+dataset3+dataset4

In [43]:
g_test=bz2.BZ2File("kfold2/graph_dataset/graphs_data_All_real.pbz2", 'rb')
dataset_test = cPickle.load(g_test)

In [44]:
import random
random.shuffle(dataset)

In [45]:
num_test=len(dataset_test)
print(num_test)

242


In [46]:
test_sampler = SubsetRandomSampler(torch.arange(num_test))
test_dataloader = GraphDataLoader(dataset_test, sampler=test_sampler, batch_size=5, drop_last=False)

In [47]:
# Create the model with given dimensions
model = Model(5,10,5,3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [48]:
folds=10
num_examples=len(dataset)
fsize=num_examples//10
val_reports=[]
for i in range(folds):
    trll = 0
    trlr = i * fsize
    vall = trlr
    valr = i * fsize + fsize
    trrl = valr
    trrr = num_examples

    train_left_indices = torch.arange(trll,trlr)
    train_right_indices = torch.arange(trrl,trrr)

    train_indices = torch.cat([train_left_indices,train_right_indices])
    val_indices = torch.arange(vall,valr)

    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=5, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=5, drop_last=False)


    for epoch in range(100):
        l=0.0
        cnt=0
        for batched_graph, labels in train_dataloader:
            pred = model(batched_graph, batched_graph.ndata['h'].float())
            loss = F.cross_entropy(pred, labels)
            l+=loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            cnt+=1
        if(epoch%10==0):
            print(f"epoch : {epoch+1} loss : {l/cnt}")
    
    with torch.no_grad():

        num_correct = 0
        num_tests = 0
        pred_labels=[]
        correct_labels=[]

        for batched_graph, labels in val_dataloader:
            pred = model(batched_graph, batched_graph.ndata['h'].float())
            num_correct += (pred.argmax(1) == labels).sum().item()
            pred_labels+=list(pred.argmax(1))
            correct_labels+=list(labels)

            num_tests += len(labels)
        print('Val accuracy:', num_correct / num_tests)
        val_reports.append(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))
    torch.save(model.state_dict,f"kfold3/trained_model/classification_{i}.model")



In [37]:
print(val_reports[0])

              precision    recall  f1-score   support

         low       0.61      0.56      0.59        39
      medium       0.44      0.40      0.42        45
        high       0.44      0.53      0.48        36

    accuracy                           0.49       120
   macro avg       0.50      0.50      0.50       120
weighted avg       0.50      0.49      0.49       120



In [23]:
num_correct = 0
num_tests = 0
pred_labels=[]
correct_labels=[]

for batched_graph, labels in test_dataloader:
    pred = model(batched_graph, batched_graph.ndata['h'].float())
    num_correct += (pred.argmax(1) == labels).sum().item()
    pred_labels+=list(pred.argmax(1))
    correct_labels+=list(labels)

    num_tests += len(labels)
print('Test accuracy:', num_correct / num_tests)

Test accuracy: 0.5247933884297521


In [24]:
print(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))

              precision    recall  f1-score   support

         low       0.65      0.66      0.65       136
      medium       0.24      0.10      0.14        59
        high       0.40      0.66      0.50        47

    accuracy                           0.52       242
   macro avg       0.43      0.47      0.43       242
weighted avg       0.50      0.52      0.50       242



In [25]:
# rec,rec=precision_recall(torch.tensor(pred_labels,dtype=torch.long), torch.tensor(correct_labels,dtype=torch.long), average='none', num_classes=3)

In [25]:
num_examples = len(dataset)
num_train = int(num_examples)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=5, drop_last=False)

In [26]:
num_correct = 0
num_tests = 0
pred_labels=[]
correct_labels=[]
with torch.no_grad():
    for batched_graph, labels in train_dataloader:
        pred = model(batched_graph, batched_graph.ndata['h'].float())
        num_correct += (pred.argmax(1) == labels).sum().item()
        pred_labels+=list(pred.argmax(1))
        correct_labels+=list(labels)
        num_tests += len(labels)
    print('Train accuracy:', num_correct / num_tests)

Train accuracy: 0.5166666666666667


In [27]:
print(classification_report(correct_labels, pred_labels, target_names=["low","medium","high"]))

              precision    recall  f1-score   support

         low       0.60      0.58      0.59       400
      medium       0.50      0.25      0.33       400
        high       0.47      0.72      0.57       400

    accuracy                           0.52      1200
   macro avg       0.52      0.52      0.50      1200
weighted avg       0.52      0.52      0.50      1200



In [28]:
# prec,rec=precision_recall(torch.tensor(pred_labels,dtype=torch.long), torch.tensor(correct_labels,dtype=torch.long), average='none', num_classes=3)
# print("precision : ",prec,"Recall : ",rec)

precision :  tensor([0.5212, 0.4233, 0.4444]) Recall :  tensor([0.6434, 0.2447, 0.5294])


In [9]:
import json
with open("kfold_500/Results/train_result.json","r") as f:
    train_res=json.load(f)

with open("kfold_500/Results/test_result.json","r") as f:
    test_res=json.load(f)

with open("kfold_500/Results/val_result.json","r") as f:
    val_res=json.load(f)

In [10]:
print(train_res)

              precision    recall  f1-score   support

         low       0.58      0.48      0.53      2000
      medium       0.41      0.40      0.41      2000
        high       0.51      0.61      0.56      2000

    accuracy                           0.50      6000
   macro avg       0.50      0.50      0.50      6000
weighted avg       0.50      0.50      0.50      6000



In [11]:
print(test_res)

              precision    recall  f1-score   support

         low       0.62      0.51      0.56       400
      medium       0.41      0.36      0.39       400
        high       0.51      0.65      0.57       400

    accuracy                           0.51      1200
   macro avg       0.51      0.51      0.51      1200
weighted avg       0.51      0.51      0.51      1200



In [12]:
print(val_res[9])

              precision    recall  f1-score   support

         low       0.58      0.43      0.50       189
      medium       0.44      0.41      0.42       216
        high       0.47      0.62      0.53       195

    accuracy                           0.48       600
   macro avg       0.49      0.49      0.48       600
weighted avg       0.49      0.48      0.48       600

