In [625]:
import os
import pickle
import random
import numpy as np
import pandas as pd
from sklearn import svm
from typing import Dict
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

import glob
import torch
import wget
import zipfile
from keras.preprocessing.text import Tokenizer

from gensim.models.keyedvectors import KeyedVectors
from gensim.models.fasttext import FastText

from mittens import GloVe as Glove
from keras.preprocessing import sequence
from keras.models import load_model

from setfit import SetFitModel, SetFitTrainer
from datasets import load_dataset, logging
logging.set_verbosity_error()

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import XLNetForSequenceClassification, RobertaForSequenceClassification
from transformers import XLMRobertaForSequenceClassification, DistilBertForSequenceClassification
from transformers import RobertaTokenizer, XLMRobertaTokenizer, DistilBertTokenizer, XLNetTokenizer

### Common Helper Functions

In [626]:
def get_avg_report(results, folds):
    
    """
    function takes the input of predicted model results on five folds and returns
    average of weighted and macro Precision, Recall, F-1 
    
    """
    
    weighted_precision = []
    weighted_recall = []
    weighted_f1 = []
    
    macro_precision = []
    macro_recall = []
    macro_f1 = []
    
    for result_df in results:                        
        res_rows = result_df.tail(3)

        precision_scores =  res_rows['precision'].tolist()
        recall_scores =  res_rows['recall'].tolist()
        f1_scores =  res_rows['f1-score'].tolist()

        precision_macro_avg =  precision_scores[1]
        precision_weighted_avg = precision_scores[2]

        recall_macro_avg =  recall_scores[1]
        recall_weighted_avg = recall_scores[2]

        fl_accuracy = f1_scores[0]
        f1_scores_macro_avg =  f1_scores[1]
        f1_scores_weighted_avg = f1_scores[2]
                
        weighted_precision.append(precision_weighted_avg)
        weighted_recall.append(recall_weighted_avg)
        weighted_f1.append(f1_scores_weighted_avg)
        
        macro_precision.append(precision_macro_avg)
        macro_recall.append(recall_macro_avg)
        macro_f1.append(f1_scores_macro_avg)
                
    weighted_average = round(sum(weighted_precision) / folds, 2), round(sum(weighted_recall) / folds, 2), round(sum(weighted_f1) / folds, 2)
    macro_average = round(sum(macro_precision) / folds, 2), round(sum(macro_recall) / folds, 2), round(sum(macro_f1) / folds, 2)
            
    return weighted_average, macro_average

def get_accuracy(y_actual, y_predicted):
    """
    function takes the actual and predicted labels to return
    the accuracy per fold
    
    """
    count = 0
    for index in zip(y_actual, y_predicted):
        
        if index[0] == index[1]:
                count += 1
    topk_acc = round(count / len(y_actual), 2)
    return topk_acc


def get_f1_score(y_actual, y_predicted):
    """
    function takes the actual and predicted labels to return
    the accuracy per fold

    """
    count = 0
    for index in zip(y_actual, y_predicted):

        if index[0] == index[1]:
                count += 1
    topk_acc = round(count / len(y_actual), 2)
    return topk_acc




### ML alogrithms Pipeline

In [627]:
def load_ML_model_files(model_name, model_path, pca=None):

    """
    function load the ML models relevant files based
    on the parameters given

    """
    ML_model = pickle.load(open(model_path + '/'+ model_name + '.pickle', 'rb'))
    if pca:
        pca_vectorizer = pickle.load(open(model_path + 'pca_vectorizer.pickle', 'rb'))
    else:
        pca_vectorizer = None
    tfidf_vectorizer = pickle.load(open(model_path + 'tfidf_vectorizer.pickle', 'rb'))

    return ML_model, pca_vectorizer, tfidf_vectorizer

In [628]:
# load dataset for testing
fold_parent = './data/dronology_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))
        

In [629]:
# replace the value of 'model_name' with desired tradional ML model's name to get results for the model
# to trigger more traditional ML models check the names in: model/ML_models. examples, DT, SVM, pLR etc. 
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'LR'
PCA = True
map_labels = {0: 'information', 1: 'requirement'}

In [630]:
# load test data & make prediction

ml_results = []
avg_accuracy = []
fold_count = 1
lr_overall_predicted_labels = []

for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']

    model_path = './models/ML_models/' + model_name + '/fold_' + str(fold_count) + '/'
    ML_model, pca_vectorizer, tfidf_vectorizer = load_ML_model_files(model_name, model_path, PCA)

    tfidf_vecs = tfidf_vectorizer.transform(X_test)
    normalized_tfidf = normalize(tfidf_vecs)

    test_vecs = pca_vectorizer.transform(normalized_tfidf.toarray())
    predicted_labels = ML_model.predict(test_vecs)

    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)

    lr_overall_predicted_labels.append(predicted_labels)
    fold_count += 1

    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')




Results for dataset fold number : 2 on model : LR

               precision    recall  f1-score    support
information    0.756757  1.000000  0.861538  56.000000
requirement    1.000000  0.100000  0.181818  20.000000
accuracy       0.763158  0.763158  0.763158   0.763158
macro avg      0.878378  0.550000  0.521678  76.000000
weighted avg   0.820768  0.763158  0.682665  76.000000
--------------------------------------

Results for dataset fold number : 3 on model : LR

               precision    recall  f1-score    support
information    0.767123  1.000000  0.868217  56.000000
requirement    1.000000  0.150000  0.260870  20.000000
accuracy       0.776316  0.776316  0.776316   0.776316
macro avg      0.883562  0.575000  0.564543  76.000000
weighted avg   0.828407  0.776316  0.708389  76.000000
--------------------------------------

Results for dataset fold number : 4 on model : LR

               precision    recall  f1-score  support
information    0.729730  0.981818  0.837209    55.

In [552]:
# replace the value of 'model_name' with desired tradional ML model's name to get results for the model
# to trigger more traditional ML models check the names in: model/ML_models. examples, DT, SVM, pLR etc.
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'pLR'
PCA = True
map_labels = {0: 'information', 1: 'requirement'}


In [553]:
# load test data & make prediction

ml_results = []
avg_accuracy = []
fold_count = 1
plr_overall_predicted_labels = []

for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']

    model_path = './models/ML_models/' + model_name + '/fold_' + str(fold_count) + '/'
    ML_model, pca_vectorizer, tfidf_vectorizer = load_ML_model_files(model_name, model_path, PCA)

    tfidf_vecs = tfidf_vectorizer.transform(X_test)
    normalized_tfidf = normalize(tfidf_vecs)

    test_vecs = pca_vectorizer.transform(normalized_tfidf.toarray())
    predicted_labels = ML_model.predict(test_vecs)

    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)

    plr_overall_predicted_labels.append(predicted_labels)
    print('\n',predicted_labels.tolist())

    fold_count += 1


 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [631]:
# replace the value of 'model_name' with desired tradional ML model's name to get results for the model
# to trigger more traditional ML models check the names in: model/ML_models. examples, DT, SVM, pLR etc.
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'SVM'
PCA = True
map_labels = {0: 'information', 1: 'requirement'}

In [632]:
# load test data & make prediction

ml_results = []
avg_accuracy = []
fold_count = 1
svm_overall_predicted_labels = []


for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']

    model_path = './models/ML_models/' + model_name + '/fold_' + str(fold_count) + '/'
    ML_model, pca_vectorizer, tfidf_vectorizer = load_ML_model_files(model_name, model_path, PCA)

    tfidf_vecs = tfidf_vectorizer.transform(X_test)
    normalized_tfidf = normalize(tfidf_vecs)

    test_vecs = pca_vectorizer.transform(normalized_tfidf.toarray())
    predicted_labels = ML_model.predict(test_vecs)

    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)

    svm_overall_predicted_labels.append(predicted_labels)

    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')

    fold_count += 1


Results for dataset fold number : 1 on model : SVM

               precision    recall  f1-score    support
information    0.823529  1.000000  0.903226  56.000000
requirement    1.000000  0.400000  0.571429  20.000000
accuracy       0.842105  0.842105  0.842105   0.842105
macro avg      0.911765  0.700000  0.737327  76.000000
weighted avg   0.869969  0.842105  0.815911  76.000000
--------------------------------------

Results for dataset fold number : 2 on model : SVM

               precision    recall  f1-score    support
information    0.787879  0.928571  0.852459  56.000000
requirement    0.600000  0.300000  0.400000  20.000000
accuracy       0.763158  0.763158  0.763158   0.763158
macro avg      0.693939  0.614286  0.626230  76.000000
weighted avg   0.738437  0.763158  0.733391  76.000000
--------------------------------------

Results for dataset fold number : 3 on model : SVM

               precision    recall  f1-score  support
information    0.768116  0.963636  0.854839    

In [633]:
# replace the value of 'model_name' with desired tradional ML model's name to get results for the model
# to trigger more traditional ML models check the names in: model/ML_models. examples, DT, SVM, pLR etc.
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'pSVM'
PCA = True
map_labels = {0: 'information', 1: 'requirement'}

In [635]:
# load test data & make prediction

ml_results = []
avg_accuracy = []
fold_count = 1
psvm_overall_predicted_labels = []

for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']

    model_path = './models/ML_models/' + model_name + '/fold_' + str(fold_count) + '/'
    ML_model, pca_vectorizer, tfidf_vectorizer = load_ML_model_files(model_name, model_path, PCA)

    tfidf_vecs = tfidf_vectorizer.transform(X_test)
    normalized_tfidf = normalize(tfidf_vecs)

    test_vecs = pca_vectorizer.transform(normalized_tfidf.toarray())
    predicted_labels = ML_model.predict(test_vecs)

    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)

    psvm_overall_predicted_labels.append(predicted_labels)

    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')


    fold_count += 1


Results for dataset fold number : 1 on model : pSVM

               precision    recall  f1-score    support
information    0.794118  0.964286  0.870968  56.000000
requirement    0.750000  0.300000  0.428571  20.000000
accuracy       0.789474  0.789474  0.789474   0.789474
macro avg      0.772059  0.632143  0.649770  76.000000
weighted avg   0.782508  0.789474  0.754548  76.000000
--------------------------------------

Results for dataset fold number : 2 on model : pSVM

               precision    recall  f1-score    support
information    0.782609  0.964286  0.864000  56.000000
requirement    0.714286  0.250000  0.370370  20.000000
accuracy       0.776316  0.776316  0.776316   0.776316
macro avg      0.748447  0.607143  0.617185  76.000000
weighted avg   0.764629  0.776316  0.734097  76.000000
--------------------------------------

Results for dataset fold number : 3 on model : pSVM

               precision    recall  f1-score  support
information    0.760563  0.981818  0.857143 

### BERT Family Pipeline

In [590]:
def load_tokenizer(model_name):
    
    """
    loads and returns the relevant tokenizer for passed parameter BERT model name
    
    """
    if model_name in ('BERT_base_uncased', 
                      'pBERT_base_uncased'):
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",
                                                  do_lower_case=True)
                
    elif model_name in ('BERT_base_cased',
                        'pBERT_base_cased'):
        tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    
    elif model_name in ('pXLNet_base', 
                        'XLNet_base'):
        tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
    
    elif model_name in ('SciBERT_uncased', 
                        'pSciBERT_uncased'):
        tokenizer = BertTokenizer.from_pretrained('allenai/scibert_scivocab_uncased', 
                                                  do_lower_case=True)
    
    elif model_name in ('pRoBERTa_base', 
                        'RoBERTa_base'):
        tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

    elif model_name in ('DisBERT_base_cased', 
                        'pDisBERT_base_cased'):
        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
    
    elif model_name in ('DisBERT_base_uncased', 
                        'pDisBERT_base_uncased'):
        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

    else:
        #'pXRBERT_base', 'XRBERT_base'
        tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
    
    return tokenizer
        

In [591]:
def load_BERT_model(model_name, model_path):
    """
    loads and returns the BERT model based on the model name and path parameters
    
    """
    
    if model_name in ('BERT_base_uncased', 'pBERT_base_cased',
                      'pBERT_base_uncased', 'BERT_base_cased',
                      'SciBERT_uncased', 'pSciBERT_uncased'
                     ):
        model = BertForSequenceClassification.from_pretrained(model_path)                
    elif model_name in ('pXLNet_base', 
                        'XLNet_base'
                       ):
        model = XLNetForSequenceClassification.from_pretrained(model_path)
    
    elif model_name in ('pRoBERTa_base', 
                        'RoBERTa_base'
                       ):
        model = RobertaForSequenceClassification.from_pretrained(model_path)

    elif model_name in ('DisBERT_base_cased', 'DisBERT_base_uncased',
                        'pDisBERT_base_cased', 'pDisBERT_base_uncased'
                       ):
        model = DistilBertForSequenceClassification.from_pretrained(model_path)    
    
    else:
        #'pXRBERT_base', 'XRBERT_base'
        model = XLMRobertaForSequenceClassification.from_pretrained(model_path)
    
    return model

In [592]:
# replace the value of 'model_name' with BERT model's name to get results for the model
# to trigger more BERT models check the names in: model/BERT_family. examples, BERT_base_cased etc. 
# put 'p' infront of the model name to couple our pre-processing pipeline

map_labels = {0: 'information', 1: 'requirement'}

prefix = './models/DL_models/BERT_family/'
model_name = 'BERT_base_uncased'

fold_parent = './data/dronology_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))

tokenizer = load_tokenizer(model_name)
MAX_SEQ_LENGTH = 128

In [593]:
tokenizer

BertTokenizer(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [594]:
fold_count = 1
results = []
avg_accuracy = []
bert_overall_predicted_labels = []

for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    selected_test = df_test[['STR.REQ','class']]

    test_sequences = selected_test['STR.REQ'].tolist()

    test_encodings = tokenizer(test_sequences, truncation=True,
                               padding=True,
                               max_length=MAX_SEQ_LENGTH,
                               return_tensors="pt")

    # load model
    model_path = glob.glob(prefix + model_name + '/fold_' + str(fold_count) + '/*')[0]
    bert_model = load_BERT_model(model_name, model_path)

    with torch.no_grad():
        logits = bert_model(**test_encodings).logits

    predictions = np.argmax(logits, axis=1)
    evaluation_results = classification_report(selected_test['class'].tolist(),
                                               predictions.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(selected_test['class'].tolist(),
                                     predictions.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    results.append(report_df)

    bert_overall_predicted_labels.append(predicted_labels)

    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)

    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')


    fold_count += 1


Results for dataset fold number : 1 on model : BERT_base_uncased

               precision    recall  f1-score    support
information    0.760563  0.964286  0.850394  56.000000
requirement    0.500000  0.105263  0.173913  19.000000
accuracy       0.746667  0.746667  0.746667   0.746667
macro avg      0.630282  0.534774  0.512153  75.000000
weighted avg   0.694554  0.746667  0.679019  75.000000
--------------------------------------

Results for dataset fold number : 2 on model : BERT_base_uncased

               precision    recall  f1-score    support
information    0.760563  0.964286  0.850394  56.000000
requirement    0.500000  0.105263  0.173913  19.000000
accuracy       0.746667  0.746667  0.746667   0.746667
macro avg      0.630282  0.534774  0.512153  75.000000
weighted avg   0.694554  0.746667  0.679019  75.000000
--------------------------------------

Results for dataset fold number : 3 on model : BERT_base_uncased

               precision    recall  f1-score    support
inf

In [595]:
fold_count = 1
results = []
avg_accuracy = []
for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'

    df_test = pd.read_csv(test_path)
    selected_test = df_test[['STR.REQ','class']]

    test_sequences = selected_test['STR.REQ'].tolist()

    test_encodings = tokenizer(test_sequences, truncation=True,
                               padding=True,
                               max_length=MAX_SEQ_LENGTH,
                               return_tensors="pt")
    # load model
    model_path = glob.glob(prefix + model_name + '/fold_' + str(fold_count) + '/*')[0]
    bert_model = load_BERT_model(model_name, model_path)

    with torch.no_grad():
        logits = bert_model(**test_encodings).logits

    predictions = np.argmax(logits, axis=1)
    evaluation_results = classification_report(selected_test['class'].tolist(),
                                               predictions.tolist(),
                                               target_names=list(map_labels.values()),
                                               output_dict=True)

    avg_accuracy.append(get_accuracy(selected_test['class'].tolist(),
                                     predictions.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    results.append(report_df)

    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')

    fold_count += 1


Results for dataset fold number : 1 on model : BERT_base_uncased

               precision    recall  f1-score    support
information    1.000000  0.928571  0.962963  56.000000
requirement    0.833333  1.000000  0.909091  20.000000
accuracy       0.947368  0.947368  0.947368   0.947368
macro avg      0.916667  0.964286  0.936027  76.000000
weighted avg   0.956140  0.947368  0.948786  76.000000
--------------------------------------

Results for dataset fold number : 2 on model : BERT_base_uncased

               precision    recall  f1-score    support
information    0.828125  0.946429  0.883333  56.000000
requirement    0.750000  0.450000  0.562500  20.000000
accuracy       0.815789  0.815789  0.815789   0.815789
macro avg      0.789062  0.698214  0.722917  76.000000
weighted avg   0.807566  0.815789  0.798904  76.000000
--------------------------------------

Results for dataset fold number : 3 on model : BERT_base_uncased

               precision    recall  f1-score    support
inf

In [624]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

for row in range (5):

    # Get the shape of the arrays
    num_rows = len(lr_overall_predicted_labels)
    num_cols = len(lr_overall_predicted_labels[row])
    # Initialize an empty array for the final result
    result_array = np.zeros((num_rows, num_cols), dtype=int)

    test_path = './data/dronology_five_folds/fold_' + str(row + 1) + '/test_' + 'fold_' + str(row + 1) + '.csv'

    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']
    for col in range (len(lr_overall_predicted_labels[row]) - 1):
        prediction_sum = lr_overall_predicted_labels[row][col]  + svm_overall_predicted_labels[row][col] + psvm_overall_predicted_labels[row][col]

        if prediction_sum >= 1:
            result_array[row][col] = 1
        else:
            result_array[row][col] = 0

        evaluation_results = classification_report(y_test.tolist(), result_array.tolist()[row],
                                                   target_names=list(map_labels.values()),
                                                   output_dict=True)

        avg_accuracy.append(get_accuracy(y_test.tolist(), result_array.tolist()[row]))

        report_df = pd.DataFrame(evaluation_results).transpose()
        ml_results.append(report_df)

    print('\nResults for dataset fold number :',str(row + 1))
    print('\n',round(report_df.loc['weighted avg']['f1-score'], 4))





Results for dataset fold number : 1

 0.8442

Results for dataset fold number : 2

 0.7414

Results for dataset fold number : 3

 0.6991

Results for dataset fold number : 4

 0.7825

Results for dataset fold number : 5

 0.7064
