In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric
import os
import javalang
from javaprep import *
from edge_index import edges
import os
from loaders import get_loaders
import matplotlib.pyplot as plt
from model import GCN
import random
from datetime import datetime
from datetime import date
import csv
import pandas as pd

In [2]:
def learn(train, test, cwe):
    
    seed = random.randint(1,10000)
    model = GCN(seed)
    learning_rate = 0.01
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = loss_function
    current_time = (datetime.now()).strftime("%H:%M:%S")
    current_date = (date.today()).strftime("%d/%m/%Y")
    epochs, losses, accs = [], [], []
    score_report = {
        'Accuracies': [], 
        'Losses': [], 
        'CWE': cwe, 
        'Model': str(model).replace('\n', ' ').replace('\t', ' '), 
        'Date': current_date, 
        'Time': current_time, 
        'Seed': seed, 
        'Learning Rate': learning_rate, 
        'Loss Function': loss_function
    }
    
    for epoch in range(100):
        for data in train:   
            optimizer.zero_grad()  
            out = model(data.x, data.edge_index, data.batch)  
            loss = criterion(out, data.y) 
            loss.backward()  
            optimizer.step()
        correct, total = 0, 0
        for quiz in test:
            out = model(quiz.x, quiz.edge_index, quiz.batch) 
            pred = out.argmax(dim=1)
            for guess, answer in zip(pred.tolist(), quiz.y.tolist()):
                if guess == answer:
                    correct+=1
                    total+=1
                else:
                    total+=1
                    
        epochs.append(epoch)
        losses.append(loss)
        accs.append(correct/total)        
        if epoch+1 in [1,3,5,10,20,50,100]:
            score_report['Accuracies'].append(round(correct/total, 2))
            score_report['Losses'].append(round(loss.item(), 2))
    plt.plot(losses)
    plt.clf()
    plt.plot(epochs, losses, label='Loss', color='magenta', linewidth=2)
    plt.plot(epochs, accs, label='Accuracy', color='blue', linewidth=2)
    plt.legend()
    plt.savefig('../pngs/'+str(cwe)+'-'+str(current_date).replace('/','-')+'-'+str(current_time)+'.png')
    plt.clf()
                                             
    return score_report

In [4]:
vulnerabilities = [22,78,79,89,90,327,328,330,501,614,643]
for vul in vulnerabilities:
    print("Learning on CWE"+str(vul)+"...")
    train, test = get_loaders(vul, "owasp")
    score_report = learn(train, test, vul)
    row = [
        score_report['CWE'],
        score_report['Date'],
        score_report['Time'],
        score_report['Model'],
        score_report['Seed'],
        score_report['Learning Rate'],
        score_report['Loss Function'],
    ]
    for acc in score_report['Accuracies']:
        row.append(acc)
    for loss in score_report['Losses']:
        row.append(loss)
        
    with open('../owasp_score_report.csv','a') as fd:
        writer = csv.writer(fd)
        writer.writerow(row)

print("Done")

Learning on CWE22...
22
2630
Learning on CWE78...
78


KeyboardInterrupt: 

<Figure size 432x288 with 0 Axes>