In [1]:
import os
import pickle
import random
import numpy as np
import pandas as pd
from sklearn import svm
from typing import Dict
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

import glob
import torch
import wget
import zipfile
from keras.preprocessing.text import Tokenizer

from gensim.models.keyedvectors import KeyedVectors
from gensim.models.fasttext import FastText

from mittens import GloVe as Glove
from keras.preprocessing import sequence
from keras.models import load_model
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from setfit import SetFitModel, SetFitTrainer
from datasets import load_dataset, logging
logging.set_verbosity_error()

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import XLNetForSequenceClassification, RobertaForSequenceClassification
from transformers import XLMRobertaForSequenceClassification, DistilBertForSequenceClassification
from transformers import RobertaTokenizer, XLMRobertaTokenizer, DistilBertTokenizer, XLNetTokenizer


2023-12-23 12:40:41.837214: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Common Helper Functions

In [2]:
def get_avg_report(results, folds):
    
    """
    function takes the input of predicted model results on five folds and returns
    average of weighted and macro Precision, Recall, F-1 
    
    """
    
    weighted_precision = []
    weighted_recall = []
    weighted_f1 = []
    
    macro_precision = []
    macro_recall = []
    macro_f1 = []
    
    for result_df in results:                        
        res_rows = result_df.tail(3)

        precision_scores =  res_rows['precision'].tolist()
        recall_scores =  res_rows['recall'].tolist()
        f1_scores =  res_rows['f1-score'].tolist()

        precision_macro_avg =  precision_scores[1]
        precision_weighted_avg = precision_scores[2]

        recall_macro_avg =  recall_scores[1]
        recall_weighted_avg = recall_scores[2]

        fl_accuracy = f1_scores[0]
        f1_scores_macro_avg =  f1_scores[1]
        f1_scores_weighted_avg = f1_scores[2]
                
        weighted_precision.append(precision_weighted_avg)
        weighted_recall.append(recall_weighted_avg)
        weighted_f1.append(f1_scores_weighted_avg)
        
        macro_precision.append(precision_macro_avg)
        macro_recall.append(recall_macro_avg)
        macro_f1.append(f1_scores_macro_avg)
                
    weighted_average = round(sum(weighted_precision) / folds, 2), round(sum(weighted_recall) / folds, 2), round(sum(weighted_f1) / folds, 2)
    macro_average = round(sum(macro_precision) / folds, 2), round(sum(macro_recall) / folds, 2), round(sum(macro_f1) / folds, 2)
            
    return weighted_average, macro_average

def get_accuracy(y_actual, y_predicted):
    """
    function takes the actual and predicted labels to return
    the accuracy per fold
    
    """
    count = 0
    for index in zip(y_actual, y_predicted):
        
        if index[0] == index[1]:
                count += 1
    topk_acc = round(count / len(y_actual), 2)
    return topk_acc


### ML alogrithms Pipeline

In [3]:
def load_ML_model_files(model_name, model_path, pca):
    
    """
    function load the ML models relevant files based 
    on the parameters given
    
    """
    
    ML_model = pickle.load(open(model_path + '/'+ model_name + '.pickle', 'rb'))
    if pca:
        pca_vectorizer = pickle.load(open(model_path + 'pca_vectorizer.pickle', "rb"))
    else:
        pca_vectorizer = None
    tfidf_vectorizer = pickle.load(open(model_path + 'tfidf_vectorizer.pickle', "rb"))
    
    return ML_model, pca_vectorizer, tfidf_vectorizer

In [4]:
# load dataset for testing
fold_parent = './data/dronology_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))
        

In [5]:
# replace the value of 'model_name' with desired tradional ML model's name to get results for the model
# to trigger more traditional ML models check the names in: model/ML_models. examples, DT, SVM, pLR etc. 
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'SVM'
PCA = True
map_labels = {0: 'information', 1: 'requirement'}

In [6]:
# load test data & make prediction

ml_results = []
avg_accuracy = []
fold_count = 1

for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'
    
    df_test = pd.read_csv(test_path)
    df_test['STR.REQ'] = df_test['STR.REQ'].str.lower()
    X_test = df_test['STR.REQ']
    y_test = df_test['class']
    
    model_path = './models/ML_models/' + model_name + '/fold_' + str(fold_count) + '/'
    ML_model, pca_vectorizer, tfidf_vectorizer = load_ML_model_files(model_name, model_path, PCA)

    tfidf_vecs = tfidf_vectorizer.transform(X_test)
    normalized_tfidf = normalize(tfidf_vecs)

    test_vecs = pca_vectorizer.transform(normalized_tfidf.toarray())
    predicted_labels = ML_model.predict(test_vecs)
    
    evaluation_results = classification_report(y_test.tolist(), predicted_labels.tolist(), 
                                               target_names=list(map_labels.values()), 
                                               output_dict=True)
    
    avg_accuracy.append(get_accuracy(y_test.tolist(), predicted_labels.tolist()))
    
    report_df = pd.DataFrame(evaluation_results).transpose()
    ml_results.append(report_df)
    
    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')
    
    fold_count += 1


Results for dataset fold number : 1 on model : SVM

               precision    recall  f1-score    support
information    0.823529  1.000000  0.903226  56.000000
requirement    1.000000  0.400000  0.571429  20.000000
accuracy       0.842105  0.842105  0.842105   0.842105
macro avg      0.911765  0.700000  0.737327  76.000000
weighted avg   0.869969  0.842105  0.815911  76.000000
--------------------------------------

Results for dataset fold number : 2 on model : SVM

               precision    recall  f1-score    support
information    0.787879  0.928571  0.852459  56.000000
requirement    0.600000  0.300000  0.400000  20.000000
accuracy       0.763158  0.763158  0.763158   0.763158
macro avg      0.693939  0.614286  0.626230  76.000000
weighted avg   0.738437  0.763158  0.733391  76.000000
--------------------------------------

Results for dataset fold number : 3 on model : SVM

               precision    recall  f1-score  support
information    0.768116  0.963636  0.854839    

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  tfidf_vectorizer = pickle.load(open(model_path + 'tfidf_vectorizer.pickle', "rb"))
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  tfidf_vectorizer = pickle.load(open(model_path + 'tfidf_vectorizer.pickle', "rb"))
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
ht

In [7]:
# Average results of ML pipeline

avg_acc_score = round(np.mean(avg_accuracy), 2)
weighted_avg, macro_avg = get_avg_report(ml_results, folds=5)

avg_scores = list([weighted_avg, macro_avg, (avg_acc_score, avg_acc_score, avg_acc_score)])

final_df = pd.DataFrame([x for x in avg_scores], columns=(['Precision', 'Recall', 'F1_score']),
                      index=['weighted_avg','macro_avg', 'accuracy_avg'])

final_df.rename_axis('5-folds')

Unnamed: 0_level_0,Precision,Recall,F1_score
5-folds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
weighted_avg,0.78,0.79,0.75
macro_avg,0.76,0.63,0.64
accuracy_avg,0.78,0.78,0.78


### BERT Family Pipeline

In [8]:
def load_tokenizer(model_name):
    
    """
    loads and returns the relevant tokenizer for passed parameter BERT model name
    
    """
    if model_name in ('BERT_base_uncased', 
                      'pBERT_base_uncased'):
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",
                                                  do_lower_case=True)
                
    elif model_name in ('BERT_base_cased',
                        'pBERT_base_cased'):
        tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    
    elif model_name in ('pXLNet_base', 
                        'XLNet_base'):
        tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
    
    elif model_name in ('SciBERT_uncased', 
                        'pSciBERT_uncased'):
        tokenizer = BertTokenizer.from_pretrained('allenai/scibert_scivocab_uncased', 
                                                  do_lower_case=True)
    
    elif model_name in ('pRoBERTa_base', 
                        'RoBERTa_base'):
        tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

    elif model_name in ('DisBERT_base_cased', 
                        'pDisBERT_base_cased'):
        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
    
    elif model_name in ('DisBERT_base_uncased', 
                        'pDisBERT_base_uncased'):
        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

    else:
        #'pXRBERT_base', 'XRBERT_base'
        tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
    
    return tokenizer
        

In [9]:
def load_BERT_model(model_name, model_path):
    """
    loads and returns the BERT model based on the model name and path parameters
    
    """
    
    if model_name in ('BERT_base_uncased', 'pBERT_base_cased',
                      'pBERT_base_uncased', 'BERT_base_cased',
                      'SciBERT_uncased', 'pSciBERT_uncased'
                     ):
        model = BertForSequenceClassification.from_pretrained(model_path)                
    elif model_name in ('pXLNet_base', 
                        'XLNet_base'
                       ):
        model = XLNetForSequenceClassification.from_pretrained(model_path)
    
    elif model_name in ('pRoBERTa_base', 
                        'RoBERTa_base'
                       ):
        model = RobertaForSequenceClassification.from_pretrained(model_path)

    elif model_name in ('DisBERT_base_cased', 'DisBERT_base_uncased',
                        'pDisBERT_base_cased', 'pDisBERT_base_uncased'
                       ):
        model = DistilBertForSequenceClassification.from_pretrained(model_path)    
    
    else:
        #'pXRBERT_base', 'XRBERT_base'
        model = XLMRobertaForSequenceClassification.from_pretrained(model_path)
    
    return model

In [10]:
# replace the value of 'model_name' with BERT model's name to get results for the model
# to trigger more BERT models check the names in: model/BERT_family. examples, BERT_base_cased etc. 
# put 'p' infront of the model name to couple our pre-processing pipeline

map_labels = {0: 'information', 1: 'requirement'}

prefix = './models/DL_models/BERT_family/'
model_name = 'DisBERT_base_uncased'

fold_parent = './data/dronology_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))

tokenizer = load_tokenizer(model_name)
MAX_SEQ_LENGTH = 128

In [11]:
tokenizer

DistilBertTokenizer(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [12]:
fold_count = 1
results = []
avg_accuracy = []
for subs in sorted(sub_folders):
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'
    
    df_test = pd.read_csv(test_path)
    selected_test = df_test[['STR.REQ','class']]

    test_sequences = selected_test['STR.REQ'].tolist()

    test_encodings = tokenizer(test_sequences, truncation=True, 
                               padding=True, 
                               max_length=MAX_SEQ_LENGTH, 
                               return_tensors="pt")
    # load model
    model_path = glob.glob(prefix + model_name + '/fold_' + str(fold_count) + '/*')[0]
    bert_model = load_BERT_model(model_name, model_path)

    with torch.no_grad():
        logits = bert_model(**test_encodings).logits

    predictions = np.argmax(logits, axis=1)
    evaluation_results = classification_report(selected_test['class'].tolist(), 
                                               predictions.tolist(), 
                                               target_names=list(map_labels.values()), 
                                               output_dict=True)
    
    avg_accuracy.append(get_accuracy(selected_test['class'].tolist(), 
                                     predictions.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    results.append(report_df)
    
    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')
    
    fold_count += 1

IndexError: list index out of range

In [None]:
# Average results of BERT model

avg_acc_score = round(np.mean(avg_accuracy), 2)
weighted_avg, macro_avg = get_avg_report(results, folds=5)

avg_scores = list([weighted_avg, macro_avg, (avg_acc_score, avg_acc_score, 
                                             avg_acc_score)])

final_df = pd.DataFrame([x for x in avg_scores], 
                        columns=(['Precision', 'Recall', 'F1_score']),
                        index=['weighted_avg','macro_avg', 'accuracy_avg'])

final_df.rename_axis('5-folds')

### Fewshot Family pipeline

In [None]:
def get_dataset(path):
    
    """
    load and return the dataset in the format fine-tuned few shot sentence-BERT 
    expects
    
    """
    dataset = load_dataset(path)
    test_dataset = dataset['test']
    return test_dataset

def _apply_column_mapping(dataset, column_mapping: Dict[str, str]):
    
    """
    apply the column mapping required for the loaded dataset
    
    """
    dataset = dataset.rename_columns(
        {
            **column_mapping,
            **{col: f"feat_{col}" for col in dataset.column_names if col not in column_mapping},
        }
    )
    dset_format = dataset.format
    dataset = dataset.with_format(
        type=dset_format["type"],
        columns=dataset.column_names,
        output_all_columns=dset_format["output_all_columns"],
        **dset_format["format_kwargs"],
    )
    return dataset

def evaluate_ST(test_data, Sent_tf_model):
    
    """
    load and evaluate the Sentence-BERT model on the given test dataset
    
    """
    eval_dataset = _apply_column_mapping(test_data, 
                                         column_mapping={"STR.REQ": "text", "class": "label"})   
    x_test = eval_dataset["text"]
    y_test = eval_dataset["label"]
    predicted_labels = Sent_tf_model.predict(x_test)
    
    return predicted_labels, y_test

In [None]:
map_labels = {0: 'information', 1: 'requirement'}
# replace the value of 'model_name' with desired few shot model's name to get results for the model
# to trigger morefew shot models models check the names in: model/Fewshot_family. examples, S-BERT_10% or pMiniLM_10%. 
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'pS-BERT_20%'

prefix = './models/DL_models/Fewshot_family/'
fold_parent = './data/dronology_preprocess_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))

In [None]:
fold_count = 1
st_results = []
avg_accuracy = []

for subs in sorted(sub_folders):
    test_dataset = get_dataset(subs)
    
    model_path = prefix + model_name + '/fold_' + str(fold_count)
    ST_model = SetFitModel.from_pretrained(model_path)
    
    predicted_labels, y_test = evaluate_ST(test_dataset, ST_model)
    
    evaluation_results = classification_report(y_test, predicted_labels.tolist(), 
                                               target_names=list(map_labels.values()), 
                                               output_dict=True)
    
    avg_accuracy.append(get_accuracy(y_test, 
                                     predicted_labels.tolist()))

    report_df = pd.DataFrame(evaluation_results).transpose()
    st_results.append(report_df)
    
    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')

    fold_count += 1


In [None]:
avg_acc_score = round(np.mean(avg_accuracy), 2)
weighted_avg, macro_avg = get_avg_report(st_results, folds=5)

avg_scores = list([weighted_avg, macro_avg, (avg_acc_score, avg_acc_score, 
                                             avg_acc_score)])

final_df = pd.DataFrame([x for x in avg_scores], 
                        columns=(['Precision', 'Recall', 'F1_score']),
                        index=['weighted_avg','macro_avg', 'accuracy_avg'])

final_df.rename_axis('5-folds')

### LSTM pipeline

In [None]:
def get_embeddings_model(model_name, prefix, model_path):
    
    embeddings_model = None
    if model_name in ('LSTM_FT_pre-train', 'pLSTM_FT_pre-train'):
        
        # Load FastText pre trained embeddings
        if not 'wiki-news-300d-1M-subword.vec' in os.listdir(prefix):
            
            print('Downloading FastText pretrained model for the first time...')
            url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M-subword.vec.zip'
            
            wget.download(url, out=prefix)
            with zipfile.ZipFile(prefix + 'wiki-news-300d-1M-subword.vec.zip', 'r') as zip_ref:
                zip_ref.extractall(prefix)
        
        embeddings_model = KeyedVectors.load_word2vec_format(prefix + 
                                                          'wiki-news-300d-1M-subword.vec')
        print('\nFastText pretrained Model loaded...')        

    elif model_name in ('LSTM_GLV_pre-train', 'pLSTM_GLV_pre-train'):
        
        # load pre trained Glove embeddings model
        if not 'glove.6B.100d.txt' in os.listdir(prefix):
            url = 'https://nlp.stanford.edu/data/glove.6B.zip'
            wget.download(url, out=prefix)
            
            with zipfile.ZipFile(prefix + 'glove.6B.zip', 'r') as zip_ref:
                zip_ref.extractall(prefix)
            
        embeddings_model = {}
        with open(prefix + 'glove.6B.100d.txt','r') as f:
            for line in f:
                
                split_line = line.split()
                word = split_line[0]
                embedding = np.array(split_line[1:], dtype=np.float64)
                embeddings_model[word] = embedding
        
        print('\nPretrained Glove Model loaded...')        
                
    elif model_name in ('LSTM_GLV_custom', 'pLSTM_GLV_custom'):
        
        # load custom glove embeddings model
        embeddings_model = Glove.load(model_path + '/glove_custom_100d.model')
        print('\nGlove custom pretrained Model loaded...')        
      
    elif model_name in ('LSTM_FT_custom', 'pLSTM_FT_custom'):
        
        # load custom FastText embeddings model
        embeddings_model = KeyedVectors.load(model_path + '/fast_text.model')
        print('\nFastText custom Model loaded...')        
    
    return embeddings_model          

In [None]:
def convert_data_to_index(string_data, wv, model_name):
    
    index_data = []
    for word in string_data:
        if word in wv:
            try:
                if 'GLV_custom' in model_name:
                    index_data.append(wv[word])

                else:
                    index_data.append(wv.vocab[word].index)
            except:
                pass

    return index_data

def convert_data(train_sentences, test_sentences, modelf, model_name):
    
    train_data = []
    i = 0
    if 'GLV_custom' in model_name:
        word_vectors = modelf.dictionary
    else:
        word_vectors = modelf.wv
    
    while i<len(train_sentences):
        for seq in train_sentences[i]:
            train_data.append(convert_data_to_index(train_sentences[i], word_vectors, 
                                                    model_name))
            break

        i+=1
    
    test_data = []
    i = 0
    while i<len(test_sentences):
        for seq in test_sentences[i]:
            test_data.append(convert_data_to_index(test_sentences[i], word_vectors, 
                                                   model_name))
            break

        i+=1
    return train_data, test_data

def pad_sequences(train_data, test_data):
    
    max_length_f = max([len(seq) for seq in train_data])   
    test_padded = sequence.pad_sequences(test_data, maxlen=max_length_f, padding='pre')
    return test_padded


In [None]:
# load dataset for testing
fold_parent = './data/dronology_preprocess_five_folds/'
#fold_parent = '../dataset/dronology_basic_data/'


sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder))
        

map_labels = {0: 'information', 1: 'requirement'}
prefix = './models/DL_models/LSTM_family/'
# replace the value of 'model_name' with desired LSTM model's name to get results for the model
# to trigger more LSTM models check the names in: model/LSTM_family. examples, LSTM_FT_custom. 
# put 'p' infront of the model name to couple our pre-processing pipeline
model_name = 'pLSTM_FT_custom'

In [None]:
fold_count = 1
results = []
avg_accuracy = []

for subs in sorted(sub_folders):
    train_path = subs + '/train_' + 'fold_' + str(fold_count) + '.csv'
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'
    
    test_df=pd.read_csv(test_path)
    train_df=pd.read_csv(train_path)
    
    model_path = prefix + model_name + '/fold_' + str(fold_count)
    
    test_df['STR.REQ'] =  test_df['STR.REQ'].str.lower()
    train_df['STR.REQ'] =  train_df['STR.REQ'].str.lower()
    train_sentences = train_df['STR.REQ'].apply(str.split).values.tolist()
    test_sentences = test_df['STR.REQ'].apply(str.split).values.tolist()

    actual = test_df['class'].tolist()
    
    embeddings_model = get_embeddings_model(model_name, prefix, model_path)

    if 'p' in model_name.split('_')[0]:
        lstm_model = load_model(model_path + '/pLSTM.h5')
    else:
        lstm_model = load_model(model_path + '/LSTM.h5')
    
    train_data, test_data = convert_data(train_sentences, test_sentences, 
                                         embeddings_model, model_name)
    test_padded = pad_sequences(train_data, test_data)
    
    test_padded = np.array(test_padded)              
    
    predictions = lstm_model.predict(test_padded)
    sorted_predictions = (-predictions).argsort()
    top_label_int = sorted_predictions[:, :1].flatten().tolist()
    
    evaluation_results = metrics.classification_report(actual, top_label_int, 
                                                       target_names=list(map_labels.values()),
                                                       output_dict=True)
    report_df = pd.DataFrame(evaluation_results).transpose()
    results.append(report_df)
    print('\nResults for dataset fold number :',fold_count, 'on model :', model_name)
    print('\n',report_df)
    print('--------------------------------------')
    
    avg_accuracy.append(get_accuracy(actual, top_label_int))
    fold_count += 1

In [None]:

avg_acc_score = round(np.mean(avg_accuracy), 2)
weighted_avg, macro_avg = get_avg_report(results, folds=5)

avg_scores = list([weighted_avg, macro_avg, (avg_acc_score, avg_acc_score, 
                                             avg_acc_score)])

final_df = pd.DataFrame([x for x in avg_scores], 
                        columns=(['Precision', 'Recall', 'F1_score']),
                        index=['weighted_avg','macro_avg', 'accuracy_avg'])

final_df.rename_axis('5-folds')

### Random Pipeline

In [None]:
def get_random_label(ranges):
    """
    returns the random label from the defined ranges of the labels
    """
    temp=random.randint(1, ranges[-1][-1])
    
    for r in ranges:
        if(temp>r[1] and temp<=r[-1]):
            return r[0]
    return None

def get_ranges(df):
    """
    predicts the random labels on the given test dataset
    
    """
    csum = 0
    ranges = []
    total_tr = len(df)

    for k, v in df['class'].value_counts().to_dict().items():

        csum_old = csum
        csum += round((v/total_tr) * 100,0)
        #print (k,"from", csum_old, "to",csum)
        ranges.append([k, csum_old, csum])
    
    r_out = []
    for row in test_df.iterrows():
        r3labels = []

        while len(r3labels)!=1:
            rl = get_random_label(ranges)
            if not rl in r3labels:
                r3labels.append(rl)

        r_out.append([row[1]['issueid'], row[1]['class'], r3labels])

    return ranges, r_out

In [None]:
# load dataset
fold_parent = './data/dronology_five_folds/'

sub_folders = []
for folder in os.listdir(fold_parent):
    if 'fold' in folder: 
        sub_folders.append(os.path.join(fold_parent, folder)) 

In [None]:
fold_count = 1
for subs in sorted(sub_folders):
    
    test_path = subs + '/test_' + 'fold_' + str(fold_count) + '.csv'
    test_df = pd.read_csv(test_path)
    ranges, r_out = get_ranges(test_df)
    
    random_out = pd.DataFrame()
    random_out['issueid'] = [i[0] for i in r_out]
    random_out['class'] = [i[1] for i in r_out]
    random_out['top_label'] = [i[2][0] for i in r_out]
    evaluation_results = classification_report(random_out['class'], random_out['top_label'], 
                                               target_names=list(map_labels.values()), 
                                               output_dict=True)
    
    report_df = pd.DataFrame(evaluation_results).transpose()
    print('\nResults for fold number :',fold_count)
    print('\n',report_df)
    print('--------------------------------------')
    
    fold_count += 1