In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric
import os
import javalang
from javaprep import *
from edge_index import edges
import os
from loaders import get_loaders
import matplotlib.pyplot as plt
import random
from datetime import datetime
from datetime import date
import csv
import pandas as pd
import torch.optim as optim
import numpy as np

In [2]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
import torch

class GCN(torch.nn.Module):
    def __init__(self, seed):
        torch.manual_seed(seed)
        super(GCN, self).__init__()
        self.conv1 = GCNConv(50, 64)
        self.conv2 = GCNConv(64, 64)
        self.conv3 = GCNConv(64, 64)
        self.lin = Linear(64, 2)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)

        
        
        return x

In [3]:
def learn(train, test, cwe):
    
    seed = random.randint(1,10000)
    model = GCN(seed)
    learning_rate = 0.01
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = loss_function
    current_time = (datetime.now()).strftime("%H:%M:%S")
    current_date = (date.today()).strftime("%d/%m/%Y")
    epochs, losses, test_accs, train_accs, precisions, recalls, fscores = [], [], [], [], [], [], []
    score_report = {
        'Test Acc': [], 
        'Losses': [],
        'FScores': [],
        'Recalls': [],
        'Precisions': [],
        'CWE': cwe, 
        'Model': str(model).replace('\n', ' ').replace('\t', ' '), 
        'Date': current_date, 
        'Time': current_time, 
        'Seed': seed, 
        'Learning Rate': learning_rate, 
        'Loss Function': loss_function
    }
    epochs_no_improve = 0
    saved = False
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.08)
    for epoch in range(40):
        print("Epoch: ", epoch, end='\r')
        train_correct, train_total = 0, 0
        for data in train:   
            optimizer.zero_grad()  
            out = model(data.x, data.edge_index, data.batch)  
            loss = criterion(out, data.y)
            pred = out.argmax(dim=1)
            train_correct+=sum(1 for x,y in zip(pred.tolist(),data.y.tolist()) if x == y)
            train_total+=len(pred.tolist())
            loss.backward()  
            optimizer.step()
        scheduler.step()
        train_accs.append(round(train_correct/train_total, 3))
        test_correct, test_total = 0, 0
        tp, fp = 0, 0
        tn, fn = 0, 0
        for quiz in test:
            out = model(quiz.x, quiz.edge_index, quiz.batch) 
            pred = out.argmax(dim=1)
            for x,y in zip(pred.tolist(),quiz.y.tolist()):
                if x == y:
                    test_correct+=1
                if x == 1 and y == 0:
                    fp+=1
                if x == 0 and y == 0:
                    tp+=1
                if x == 0 and y == 0:
                    tn+=1
                if x == 0 and y ==1:
                    fn+=1
            test_total+=len(pred.tolist())
        try:
            precisions.append(round(tp/(tp+fp), 3)) 
            recalls.append(round(tp/(tp+fn), 3)) 
            fscores.append(2*(((tp/(tp+fp))*(tp/(tp+fn)))/((tp/(tp+fp))+(tp/(tp+fn)))))
        except:
            print("Divide by zero error")
            precisions.append(0) 
            recalls.append(0) 
            fscores.append(0)
        epochs.append(epoch)
        losses.append(loss)
        test_accs.append(test_correct/test_total) 
        #print("Training ", str(cwe.split('/')[3]), "on epoch ", epoch, "with acc: ", str(round(correct/total, 4)), end = '\r')
        if epoch+1 in [1,3,5,10,20,40]:
            score_report['Test Acc'].append(round(test_correct/test_total, 3))
            score_report['Losses'].append(round(loss.item(), 3))
            score_report['FScores'].append(round((2*(((tp/(tp+fp))*(tp/(tp+fn)))/((tp/(tp+fp))+(tp/(tp+fn))))), 3))
            score_report['Recalls'].append(round(tp/(tp+fn), 3)) 
            score_report['Precisions'].append(round(tp/(tp+fp), 3)) 
        
        
    plt.figure(figsize=(20,10))
    x_ticks = np.arange(0, 40, 5)
    plt.xticks(x_ticks)
    y_ticks = np.arange(0, 1, 0.1)
    plt.yticks(y_ticks)
    plt.plot(epochs, losses, label='train loss', color='darkviolet', linewidth=2)
    plt.plot(epochs, train_accs, label='train acc', color='gold', linewidth=2)
    plt.plot(epochs, test_accs, label='test acc', color='forestgreen', linewidth=2)
    plt.plot(epochs, precisions, label='precision', color='dodgerblue', linewidth=2)
    plt.plot(epochs, recalls, label='recall', color='gray', linewidth=2)
    plt.plot(epochs, fscores, label='f-score', color='crimson', linewidth=2)
    plt.legend(prop={'size': 20})
    plt.savefig('../pngs/JULIET-'+str(cwe.split('/')[3])+'-'+str(current_date).replace('/','-')+'-'+str(current_time)+'.png')
    plt.clf()
    print()
                                             
    return score_report, model

"Methods used to create data flow patterns (“source” and “sink” methods) in More Complex test cases are not considered “helper” methods because they are not part of the flaw construct."

In [None]:
for cwe in [x[0] for x in os.walk('../juliet/prep/')][1:]:
    try:
        current_time = (datetime.now()).strftime("%H:%M:%S")
        current_date = (date.today()).strftime("%d/%m/%Y")
        models = []
        ending_acc = []
        sps = []
        for i in range(3):
            print(cwe.split('/')[3], " Round: ", i)
            train, test = get_loaders(cwe, "juliet")
            score_report, mod = learn(train, test, cwe)
            ending_acc.append(score_report['Test Acc'][-1])
            models.append(mod)
            sps.append(score_report)
            if max(ending_acc) == 1:
                break
        best_run = ending_acc.index(max(ending_acc))
        score_report = sps[best_run]
        model = models[best_run]
        torch.save(model.state_dict(), '../models/'+str(cwe.split('/')[3])+'-'+(current_date).replace('/','')+'-'+current_time.replace('/',''))
        row = [
            score_report['CWE'],
            score_report['Date'],
            score_report['Time'],
            score_report['Model'],
            score_report['Seed'],
            score_report['Learning Rate'],
            score_report['Loss Function'],
        ]
        for epoch in zip(score_report['Test Acc'], score_report['Precisions'], score_report['FScores'], score_report['Recalls']):
            for item in epoch:
                row.append(item)

        print(row)

        with open('../juliet_score_report.csv','a') as fd:
            writer = csv.writer(fd)
            writer.writerow(row)
    except:
        print("failed for "+cwe)
        pass

print("Done")

CWE606  Round:  0
../juliet/prep/CWE606
generating asts...
generating trees...
training word2vec...
generating graphs...
balancing dataset (pass 1)
balancing dataset (pass 2)
Number of good methods:  1248  Number of bad methods:  1248
splitting dataset
Epoch:  39
['../juliet/prep/CWE606', '27/12/2020', '01:12:13', 'GCN(   (conv1): GCNConv(50, 64)   (conv2): GCNConv(64, 64)   (conv3): GCNConv(64, 64)   (lin): Linear(in_features=64, out_features=2, bias=True) )', 8961, 0.01, CrossEntropyLoss(), 0.696, 0.802, 0.724, 0.659, 0.994, 0.988, 0.994, 1.0, 0.998, 0.996, 0.998, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
CWE606  Round:  0
../juliet/prep/CWE606
generating asts...


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 1345, in parse_block_statement
    statement = self.parse_local_variable_declaration_statement()
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 1355, in parse_local_variable_declaration_statement
    declarators = self.parse_variable_declarators()
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 1207, in parse_variable_declarators
    declarator = self.parse_variable_declarator()
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 1217, in parse_variable_declarator
    identifier = self.parse_identifier()
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 232, in parse_identifier
    return self.accept(Identifier)
  File "/home/gparte1/.local/lib/python3.6/site-packages/javalang/parser.py", line 133, in accept
    self.illegal("Expected %s

<Figure size 1440x720 with 0 Axes>