In [1]:
import sys
import arff
from io import StringIO
from sklearn import svm
import numpy as np
import pandas as pd
import time
import json
import csv
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import re
from pandasgui import show

from sklearn.model_selection import cross_validate
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import minmax_scale
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import plot_confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import Binarizer
from sklearn.calibration import calibration_curve
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import VotingClassifier


idFlakiesProjects = ['redpipe', 'vertexium', 'javaCasClient', 'c2mon', 'vertx', 'excelastic', 'rxjava2', 'tyrus', 'esper', 'yawp', 'luwak', 'fluentLoggerJava', 'delightNashornSandbox', 'dbScheduler', 'one', 'sawmill', 'springCloudZuulRatelimit', 'timely', 'sos', 'openpojo', 'ociJavaSdk', 'aletheia', 'pippo', 'recast4j', 'noxy', 'springCloudAws', 'vertxMqtt', 'vertxRabbitmqClient', 'admiral', 'carbonApimgt', 'riptide', 'fastjson', 'dubbo', 'webcollector', 'doanduyhaiAchilles', 'elasticjoblite', 'disconf', 'hutool', 'oryx', 'querydsl', 'helios', 'retrofit', 'javaWebsocket', 'undertow', 'alien4cloud', 'cukes', 'hsac', 'googdDataCl', 'springDataBean', 'jhispster', 'marineApi', 'junitQuickcheck', 'nexus', 'springDataEnvers', 'springWs', 'aismessages', 'unix4j', 'wikidata', 'activiti', 'jackrabbit', 'struts', 'jfreechart', 'httpRequest', 'wildflymaven', 'nifiRegistry', 'arangoddb', 'dnsjava', 'as2lib', 'whois', 'dbean', 'searchHighlighter', 'wildflymavenplugin', 'balana', 'limfs', 'jodatime', 'otto', 'dropwizard']

msr4FlakinessProjects = ['Achilles', 'ambari', 'assertj-core', 'checkstyle', 'commons-exec', 'dropwizard', 'hadoop', 'handlebars', 'hbase', 'hector', 'httpcore', 'jackrabbit-oak', 'jimfs', 'logback', 'ninja', 'okhttp', 'oozie', 'orbit', 'oryx', 'spring-boot', 'alluxio', 'togglz', 'undertow', 'wro4j', 'zxing']

In [2]:
data = pd.read_csv('sampled.csv')

In [3]:
data = data.reset_index()

# removing smells classes exceptionCatchingThrowing and dependentTest becouse not has value
data = data.drop(columns=['commit', 'testClass', 'testMethod', 'testFilePath', 'productionFilePath', 'relativeTestFilePath', 'relativeProductionFilePath', 'tsTestClass', 'tsTestMethod', 'is_sampled', 'dependentTest', 'exceptionCatchingThrowing', 'vocabulary', 'tokens_parser', 'keywords_parser', 'strings_parser', 'string_type_parser', 'anotations_parser'], axis=1)

data = data.replace(np.nan, 0)
data = data.replace(True, 1)
data = data.replace(False, 0)

data = data.replace('flaky', 1)
data = data.replace('nonflaky', 0)

In [4]:
data = data.astype({
    'loc': 'int32',
    'smellsCount': 'int32',
    'assertionRoulette': 'int32',
    'conditionalTestLogic': 'int32',
    'constructorInitialization': 'int32',
    'defaultTest': 'int32',
    'duplicateAssert': 'int32',
    'eagerTest': 'int32',
    'emptyTest': 'int32',
    'generalFixture': 'int32',
    'ignoredTest': 'int32',
    'lazyTest': 'int32',
    'magicNumberTest': 'int32',
    'mysteryGuest': 'int32',
    'printStatement': 'int32',
    'redundantAssertion': 'int32',
    'resourceOptimism': 'int32',
    'sensitiveEquality': 'int32',
    'sleepyTest': 'int32',
    'unknownTest': 'int32',
    'verboseTest': 'int32',
})


In [5]:
# filtering by dataset
allMsr4flakiness = data.loc[data.dataset == 'msr4flakiness']
before = len(allMsr4flakiness)

# filtering by projects
smells = allMsr4flakiness.loc[allMsr4flakiness.project.isin(msr4FlakinessProjects)]
after = len(smells)

smells = smells.reset_index()

print(before, after)

2800 2777


In [6]:
train = smells

In [7]:
train_y = train['klass']
train = train.drop(['project', 'klass', 'dataset', 'level_0', 'index'], axis=1)

X_train, x_test, y_train, y_test = train_test_split(train, train_y, test_size=0.2, random_state=1) #, random_state=0

In [8]:
classifiers_old = {
        'randomForest': RandomForestClassifier(random_state=1), 
        'decisionTree': DecisionTreeClassifier(min_samples_leaf=1),
        'naiveBayes': GaussianNB(),
        'smo': CalibratedClassifierCV(LinearSVC(fit_intercept=False, tol=0.001, C=1, dual=False, max_iter=100000), method='sigmoid'),
        'knn': KNeighborsClassifier(n_neighbors=1, metric='euclidean'),
        'logisticRegression': LogisticRegression(max_iter=1000),
        'perceptron': CalibratedClassifierCV(Perceptron()),
        'lda': LinearDiscriminantAnalysis(),
    }

classifierStatistics = pd.DataFrame(columns=['features', 'process', 'step', 'classifier', 'acc', 'precision', 'recall', 'f1', 'mcc', 'auc', 'VP', 'FN'])

In [9]:
trained_classifier_old = {}
predict_classifiers = {}

for key, classifier in classifiers_old.items():

    classifier.fit(X_train, y_train)
    predict = classifier.predict(x_test)
    y_probs = classifier.predict_proba(x_test)[:,1]

    predict_classifiers[key] = predict

    acc = classifier.score(x_test, y_test)
    f1 = f1_score(y_test, predict, average='weighted')  
    cr = classification_report(y_test, predict, output_dict=True)  
    mcc = matthews_corrcoef(y_test, predict)
    auc = roc_auc_score(y_test, y_probs)

    classStatistics = {
        'features': 'smells',
        'process': 'tradicional',
        'step': 'training',
        'classifier': key,
        'acc': acc,
        'precision': cr['weighted avg']['precision'],
        'recall': cr['weighted avg']['recall'],
        'f1': f1,
        'auc': auc,
        'mcc': mcc
    }

    classifierStatistics = classifierStatistics.append(classStatistics, ignore_index=True)
    trained_classifier_old[key] = classifier

In [10]:
len(X_train.columns)

21

In [11]:
classifierStatistics[classifierStatistics.process == 'tradicional']

Unnamed: 0,features,process,step,classifier,acc,precision,recall,f1,mcc,auc,VP,FN
0,smells,tradicional,training,randomForest,0.836331,0.836912,0.836331,0.836402,0.672862,0.905892,,
1,smells,tradicional,training,decisionTree,0.834532,0.834601,0.834532,0.834556,0.668567,0.862114,,
2,smells,tradicional,training,naiveBayes,0.652878,0.738687,0.652878,0.610184,0.368766,0.783951,,
3,smells,tradicional,training,smo,0.751799,0.752165,0.751799,0.751188,0.502338,0.829971,,
4,smells,tradicional,training,knn,0.81295,0.812908,0.81295,0.812918,0.625101,0.812445,,
5,smells,tradicional,training,logisticRegression,0.793165,0.793934,0.793165,0.792616,0.585713,0.873619,,
6,smells,tradicional,training,perceptron,0.776978,0.777729,0.776978,0.77634,0.553177,0.864558,,
7,smells,tradicional,training,lda,0.782374,0.783548,0.782374,0.781608,0.564326,0.861758,,


In [12]:
#Getting information gain ranking

informationGain = dict(zip(train.columns, mutual_info_classif(train, train_y, discrete_features=True)))
sortedInformationGain = sorted(informationGain, key=informationGain.get, reverse=True)

train_xy = train.join(train_y)

In [13]:
i = 0
sortedInformationGainPosition = []

for r in sortedInformationGain:    

    infGain = {
        'position': i, 
        'token': r, 
        'information_gain': informationGain[r], 
        'total_ocurences': len(train_xy[ train_xy[r] > 0 ] ), 
        'total_flaky_occurences': len(train_xy[ (train_xy[r] > 0)  & (train_xy['klass'] == 1) ]), 
        'total_nonflaky_occurences': len(train_xy[ (train_xy[r] > 0)  & (train_xy['klass'] == 0) ])
    }

    sortedInformationGainPosition.append(infGain)
    i += 1

infGainDataset = pd.DataFrame(sortedInformationGainPosition)

In [14]:
infGainDataset.head(50)

Unnamed: 0,position,token,information_gain,total_ocurences,total_flaky_occurences,total_nonflaky_occurences
0,0,loc,0.2544574,2777,1377,1400
1,1,assertionRoulette,0.08323976,1389,968,421
2,2,smellsCount,0.02705301,2655,1356,1299
3,3,sleepyTest,0.01948161,112,105,7
4,4,generalFixture,0.01600704,267,61,206
5,5,duplicateAssert,0.01552284,376,269,107
6,6,constructorInitialization,0.01094276,68,63,5
7,7,printStatement,0.01056366,58,55,3
8,8,sensitiveEquality,0.005852377,129,95,34
9,9,lazyTest,0.00549026,1788,817,971


In [15]:
#Inter- intra-project test

In [16]:
test = data.loc[data.dataset == 'idFlakies']
test = test.reset_index()

In [17]:
msr4flakiness = test.loc[test.project.isin(msr4FlakinessProjects)]
idflakies = test[test.project.isin(idFlakiesProjects)]

msr4flakiness_y = msr4flakiness['klass']
idflakies_y = idflakies['klass']

msr4flakiness = msr4flakiness.drop(['index', 'project', 'klass', 'dataset', 'level_0'], axis=1)
idflakies = idflakies.drop(['index', 'project', 'klass', 'dataset', 'level_0'], axis=1)

In [18]:
print('msr4flakiness samples', len(msr4flakiness), len(msr4flakiness_y))
print('idflakies samples', len(idflakies), len(idflakies_y))

msr4flakiness samples 35 35
idflakies samples 120 120


In [19]:
for key, classifier in trained_classifier_old.items():
    predict_msr4flakiness = classifier.predict(msr4flakiness)
    msr4flakiness_acc = classifier.score(msr4flakiness, msr4flakiness_y)        
    cr_msr4flakiness = classification_report(msr4flakiness_y, predict_msr4flakiness, output_dict=True, zero_division=1)
    tn_msr4flakiness_, fp_msr4flakiness_, fn_msr4flakiness_, tp_msr4flakiness_ = confusion_matrix(msr4flakiness_y, predict_msr4flakiness, labels=[0,1]).ravel()

    classStatistics = {
        'features': 'smells',
        'process': 'traditional',
        'step': 'testing-intra-projects',
        'classifier': key,
        'acc': msr4flakiness_acc,
        'recall': cr_msr4flakiness['1']['recall'],
        'VP': tp_msr4flakiness_,
        'FN': fn_msr4flakiness_
    }

    classifierStatistics = classifierStatistics.append(classStatistics, ignore_index=True)

    predict_idflakies = classifier.predict(idflakies)
    idflakies_acc = classifier.score(idflakies, idflakies_y)
    cr_idflakies = classification_report(idflakies_y, predict_idflakies, output_dict=True, zero_division=1)
    tn_idflakies_, fp_idflakies_, fn_idflakies_, tp_idflakies_ = confusion_matrix(idflakies_y, predict_idflakies, labels=[0, 1]).ravel()

    classStatistics = {
        'features': 'smells',
        'process': 'traditional',
        'step': 'testing-inter-projects',
        'classifier': key,
        'acc': idflakies_acc,
        'recall': cr_idflakies['1']['recall'],
        'VP': tp_idflakies_,
        'FN': fn_idflakies_
    }

    classifierStatistics = classifierStatistics.append(classStatistics, ignore_index=True)

In [20]:
classifierStatistics[(classifierStatistics.process == 'traditional') & ((classifierStatistics.step == 'testing-intra-projects'))]

Unnamed: 0,features,process,step,classifier,acc,precision,recall,f1,mcc,auc,VP,FN
8,smells,traditional,testing-intra-projects,randomForest,0.685714,,0.685714,,,,24.0,11.0
10,smells,traditional,testing-intra-projects,decisionTree,0.657143,,0.657143,,,,23.0,12.0
12,smells,traditional,testing-intra-projects,naiveBayes,0.571429,,0.571429,,,,20.0,15.0
14,smells,traditional,testing-intra-projects,smo,0.657143,,0.657143,,,,23.0,12.0
16,smells,traditional,testing-intra-projects,knn,0.514286,,0.514286,,,,18.0,17.0
18,smells,traditional,testing-intra-projects,logisticRegression,0.742857,,0.742857,,,,26.0,9.0
20,smells,traditional,testing-intra-projects,perceptron,0.714286,,0.714286,,,,25.0,10.0
22,smells,traditional,testing-intra-projects,lda,0.657143,,0.657143,,,,23.0,12.0


In [21]:
classifierStatistics[(classifierStatistics.process == 'traditional') & (classifierStatistics.step == 'testing-inter-projects')]

Unnamed: 0,features,process,step,classifier,acc,precision,recall,f1,mcc,auc,VP,FN
9,smells,traditional,testing-inter-projects,randomForest,0.541667,,0.541667,,,,65.0,55.0
11,smells,traditional,testing-inter-projects,decisionTree,0.483333,,0.483333,,,,58.0,62.0
13,smells,traditional,testing-inter-projects,naiveBayes,0.141667,,0.141667,,,,17.0,103.0
15,smells,traditional,testing-inter-projects,smo,0.55,,0.55,,,,66.0,54.0
17,smells,traditional,testing-inter-projects,knn,0.508333,,0.508333,,,,61.0,59.0
19,smells,traditional,testing-inter-projects,logisticRegression,0.475,,0.475,,,,57.0,63.0
21,smells,traditional,testing-inter-projects,perceptron,0.475,,0.475,,,,57.0,63.0
23,smells,traditional,testing-inter-projects,lda,0.475,,0.475,,,,57.0,63.0
