In [1]:
import os
import sys
import pandas as pd
import joblib

from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import pickle

sys.path.append(os.path.abspath("ColumnsProcessing.py"))
from ColumnsProcessing import *

pd.set_option("display.max_columns", 100)

In [2]:
vectorizer = pickle.load(open("/Users/borisperezg/rebelmodels_storing/models/Vectorizer/vectorizer.pickle", "rb"))

In [3]:
cp = ColumnsProcessing()

In [205]:
fileName = 'facts_prediction_dataset_1617483308628'

In [206]:
# Se usa el mismo dataset de entrada para identificacion de ATD y para identificacion de QA
myDF = read_csv('/Users/borisperezg/rebelmodels_storing/datasets_to_classify/'+fileName+'_pa.csv')
df_ = myDF.copy(deep=True)

In [207]:
df_

Unnamed: 0,factid,drivertype,goaltype,sourceelementname,sourceelementtype,layersource,targetelementname,targetelementtype,layertarget,isnewelement,iscyclic,facttype,relatontype,actiontype,incoming,outcoming,ratiolinks,mostlinkedlayer,property,propertynewvalue,propertyoldvalue,commitlogs_ngrams,chatlogs_ngrams,adrlogs_ngrams
0,127,improve process,minimal process definition,production service,service,business,,,,,False,element,,create,1,1,0.17,business,,,,,,
1,128,improve process,minimal process definition,production process,process,business,,,,,False,element,,create,0,4,0.33,business,,,,,,
2,129,improve process,minimal process definition,production planning,process,business,,,,,False,element,,create,1,1,0.17,business,,,,,,
3,130,improve process,minimal process definition,processing,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,
4,131,improve process,minimal process definition,packaging,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,
5,132,improve process,minimal process definition,delivering process,process,business,,,,,False,element,,create,1,0,0.08,business,,,,,,
6,133,improve process,minimal process definition,production service,service,business,simplification of product manufacturing process,requirement,motivation,True,False,relation,realization,create,0,0,0.0,business,,,,,,
7,134,improve process,minimal process definition,production process,process,business,production planning,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,
8,135,improve process,minimal process definition,production process,process,business,processing,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,
9,136,improve process,minimal process definition,production process,process,business,packaging,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,


In [208]:
# --------------------------------------
# CORRECCION DE NULLS
# --------------------------------------

In [209]:
df_[['targetelementname']] = df_[['targetelementname']].fillna(value='NoName')
df_[['targetelementtype']] = df_[['targetelementtype']].fillna(value='NoType')
df_[['sourceelementtype']] = df_[['sourceelementtype']].fillna(value='NoType')
df_[['mostlinkedlayer']] = df_[['mostlinkedlayer']].fillna(value='NoLayer')
df_[['isnewelement']] = df_[['isnewelement']].fillna(value='NoNew')
df_[['isnewelement']] = df_[['isnewelement']].astype(str)

df_[['layertarget']] = df_[['layertarget']].fillna(value='NoLayer')
df_[['layersource']] = df_[['layersource']].fillna(value='NoLayer')
df_[['relatontype']] = df_[['relatontype']].fillna(value='NoRelation')

df_[['property']] = df_[['property']].fillna(value='NoProperty')
df_[['propertynewvalue']] = df_[['propertynewvalue']].fillna(value='NoValue')
df_[['propertyoldvalue']] = df_[['propertyoldvalue']].fillna(value='NoValue')

In [210]:
# --------------------------------------
# LEMATIZACION Y REMOCION DE STOPWORDS
# --------------------------------------

In [211]:
df_['driver_lemma'] = df_.drivertype.apply(cp.remove_stopwords_and_lemma)
df_['goal_lemma'] = df_.goaltype.apply(cp.remove_stopwords_and_lemma)
df_['sourceelementname_lemma'] = df_.sourceelementname.apply(cp.remove_stopwords_and_lemma)
df_['targetelementname_lemma'] = df_.targetelementname.apply(cp.remove_stopwords_and_lemma)

# Los campos commitlogs_ngrams, chatlogs_ngrams, adrlogs_ngrams ya fueron procesados en Step 1

In [212]:
# --------------------------------------
# CONCATENACION DE CAMPOS TEXTUALES LEMATIZADOS
# --------------------------------------

In [213]:
df_['all_texts'] = df_['driver_lemma'].map(str) + ' ' + df_['goal_lemma'].map(str) + ' ' +  df_['sourceelementname_lemma'].map(str) + ' ' +  df_['targetelementname_lemma'].map(str) + ' ' +  df_['commitlogs_ngrams'].map(str) + ' ' +  df_['chatlogs_ngrams'].map(str) + ' ' +  df_['adrlogs_ngrams'].map(str)

In [214]:
df_.drop(['drivertype', 'goaltype', 'sourceelementname', 'driver_lemma', 'goal_lemma', 
          'sourceelementname_lemma', 'targetelementname', 'targetelementname_lemma',
         'commitlogs_ngrams', 'chatlogs_ngrams', 'adrlogs_ngrams'], axis=1, inplace=True)

In [215]:
df_.drop(['factid'], axis=1, inplace=True)

In [216]:
# --------------------------------------
# VECTORIZACION DE COLUMNA TEXTUAL
# --------------------------------------

In [217]:
result = cp.textColumnsVectorizationUsed(df_, 'all_texts', vectorizer)

In [218]:
# --------------------------------------
# NORMALIZACION DE COLUMNAS NUMERICAS
# --------------------------------------

In [219]:
result = cp.numericalNormalization(result, 'incoming')

In [220]:
result = cp.numericalNormalization(result, 'outcoming')

In [221]:
# --------------------------------------
# PROCESAMIENTO DE COLUMNAS CATEGORICAS
# --------------------------------------

In [222]:
result = cp.openCategoricalColumnsUsed(result, 'sourceelementtype')
result = cp.openCategoricalColumnsUsed(result, 'targetelementtype')
result = cp.openCategoricalColumnsUsed(result, 'propertynewvalue')
result = cp.openCategoricalColumnsUsed(result, 'propertyoldvalue')
result = cp.openCategoricalColumnsUsed(result, 'mostlinkedlayer')
result = cp.openCategoricalColumnsUsed(result, 'isnewelement')
result = cp.openCategoricalColumnsUsed(result, 'layersource')
result = cp.openCategoricalColumnsUsed(result, 'layertarget')
result = cp.openCategoricalColumnsUsed(result, 'relatontype')
result = cp.openCategoricalColumnsUsed(result, 'actiontype')
result = cp.openCategoricalColumnsUsed(result, 'property')
result = cp.openCategoricalColumnsUsed(result, 'facttype')

In [223]:
result

Unnamed: 0,iscyclic,incoming,outcoming,ratiolinks,action,and,application,assigment,assignment,automation,between,business,care,caring,chosen,collaboration,communication,component,connection,control,cost,data,distributed,element,estimation,estimator,event,exit,for,functionality,future,general,guidance,historical,inbound,information,is,it,kind,linked,moving,nan,new,next,noname,of,option,outbound,outcome,outpatient,...,two,type,validation,waiting,sourceelementtype_NoType,sourceelementtype_collaboration,sourceelementtype_component,sourceelementtype_interface,sourceelementtype_process,sourceelementtype_service,targetelementtype_NoType,targetelementtype_collaboration,targetelementtype_component,targetelementtype_interface,targetelementtype_process,targetelementtype_requirement,targetelementtype_service,propertynewvalue_NoValue,propertynewvalue_event,propertynewvalue_procedure_call,propertynewvalue_sync,propertyoldvalue_NoValue,propertyoldvalue_none,mostlinkedlayer_NoLayer,mostlinkedlayer_application,mostlinkedlayer_business,isnewelement_False,isnewelement_NoNew,isnewelement_True,layersource_NoLayer,layersource_application,layersource_business,layertarget_NoLayer,layertarget_application,layertarget_business,layertarget_motivation,relatontype_NoRelation,relatontype_composition,relatontype_consume,relatontype_realization,relatontype_serving,relatontype_triggering,actiontype_create,actiontype_delete,actiontype_update,property_NoProperty,property_service type,property_sync,facttype_element,facttype_relation
0,False,0.5,0.25,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.73479,0.0,0.0,0.396091,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,False,0.0,1.0,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.660735,0.0,0.0,0.356172,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,False,0.5,0.25,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.759184,0.0,0.0,0.409241,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,False,1.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.759184,0.0,0.0,0.409241,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,False,1.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.759184,0.0,0.0,0.409241,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
5,False,0.5,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.660735,0.0,0.0,0.356172,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
6,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.68727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
7,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
8,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
9,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


## ---------------------------------------------
## PREDICCION PARA ATD

In [224]:
# --------------------------------------
# PREPARACION DE LOS DATOS PARA PREDECIR
# --------------------------------------

In [225]:
x = result.values

In [226]:
# --------------------------------------
# CARGA DE MODELO Y PREDICCION
# --------------------------------------

In [227]:
filename = '/Users/borisperezg/rebelmodels_storing/models/ATDIdentification/atdidentificationmodel.sav'
model_for_ATD = joblib.load(filename)



In [228]:
y_pred_ATD = model_for_ATD.predict(x)
print(y_pred_ATD)

['noatd' 'dependency violations' 'dependency violations'
 'dependency violations' 'dependency violations' 'dependency violations'
 'dependency violations' 'dependency violations' 'dependency violations'
 'dependency violations' 'dependency violations' 'dependency violations'
 'dependency violations' 'dependency violations' 'noatd' 'noatd' 'noatd'
 'noatd' 'dependency violations' 'noatd' 'dependency violations' 'noatd'
 'noatd' 'noatd' 'noatd' 'noatd' 'noatd' 'noatd' 'dependency violations'
 'noatd' 'noatd' 'noatd' 'dependency violations' 'noatd'
 'dependency violations' 'noatd']


In [229]:
result['atdcause'] = y_pred_ATD
result = cp.openCategoricalColumnsUsed(result, 'atdcause')

In [230]:
myDF

Unnamed: 0,factid,drivertype,goaltype,sourceelementname,sourceelementtype,layersource,targetelementname,targetelementtype,layertarget,isnewelement,iscyclic,facttype,relatontype,actiontype,incoming,outcoming,ratiolinks,mostlinkedlayer,property,propertynewvalue,propertyoldvalue,commitlogs_ngrams,chatlogs_ngrams,adrlogs_ngrams
0,127,improve process,minimal process definition,production service,service,business,,,,,False,element,,create,1,1,0.17,business,,,,,,
1,128,improve process,minimal process definition,production process,process,business,,,,,False,element,,create,0,4,0.33,business,,,,,,
2,129,improve process,minimal process definition,production planning,process,business,,,,,False,element,,create,1,1,0.17,business,,,,,,
3,130,improve process,minimal process definition,processing,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,
4,131,improve process,minimal process definition,packaging,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,
5,132,improve process,minimal process definition,delivering process,process,business,,,,,False,element,,create,1,0,0.08,business,,,,,,
6,133,improve process,minimal process definition,production service,service,business,simplification of product manufacturing process,requirement,motivation,True,False,relation,realization,create,0,0,0.0,business,,,,,,
7,134,improve process,minimal process definition,production process,process,business,production planning,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,
8,135,improve process,minimal process definition,production process,process,business,processing,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,
9,136,improve process,minimal process definition,production process,process,business,packaging,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,


In [231]:
finalDF = myDF.copy(deep=False)
finalDF['atdcause'] = y_pred_ATD
finalDF

Unnamed: 0,factid,drivertype,goaltype,sourceelementname,sourceelementtype,layersource,targetelementname,targetelementtype,layertarget,isnewelement,iscyclic,facttype,relatontype,actiontype,incoming,outcoming,ratiolinks,mostlinkedlayer,property,propertynewvalue,propertyoldvalue,commitlogs_ngrams,chatlogs_ngrams,adrlogs_ngrams,atdcause
0,127,improve process,minimal process definition,production service,service,business,,,,,False,element,,create,1,1,0.17,business,,,,,,,noatd
1,128,improve process,minimal process definition,production process,process,business,,,,,False,element,,create,0,4,0.33,business,,,,,,,dependency violations
2,129,improve process,minimal process definition,production planning,process,business,,,,,False,element,,create,1,1,0.17,business,,,,,,,dependency violations
3,130,improve process,minimal process definition,processing,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,,dependency violations
4,131,improve process,minimal process definition,packaging,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,,dependency violations
5,132,improve process,minimal process definition,delivering process,process,business,,,,,False,element,,create,1,0,0.08,business,,,,,,,dependency violations
6,133,improve process,minimal process definition,production service,service,business,simplification of product manufacturing process,requirement,motivation,True,False,relation,realization,create,0,0,0.0,business,,,,,,,dependency violations
7,134,improve process,minimal process definition,production process,process,business,production planning,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations
8,135,improve process,minimal process definition,production process,process,business,processing,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations
9,136,improve process,minimal process definition,production process,process,business,packaging,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations


In [232]:
#df_.to_csv('/Users/borisperezg/rebelmodels_storing/datasets_to_classify/multiclass_entrydataset_atdidentification_done1.csv', index=False, index_label=True)

## -----------------------------------------------------------------
## PREDICCION PARA ATRIBUTOS DE CALIDAD

In [233]:
# --------------------------------------
# PREPARACION DE LOS DATOS PARA PREDECIR
# --------------------------------------

In [234]:
# Se toma el dataframe como quedo
x = result.values

In [235]:
# --------------------------------------
# CARGA DE MODELO Y PREDICCION
# --------------------------------------

In [236]:
filename = '/Users/borisperezg/rebelmodels_storing/models/ATDIdentification/affectedqa_model.sav'
model_for_QA = joblib.load(filename)

In [237]:
y_pred_QA = model_for_QA.predict(x)
print(y_pred_QA)

['none' 'none' 'none' 'none' 'none' 'none' 'none' 'none' 'none' 'none'
 'none' 'none' 'none' 'none' 'none' 'none' 'none' 'none' 'maintainability'
 'none' 'maintainability' 'none' 'none' 'none' 'none' 'none' 'none' 'none'
 'maintainability' 'none' 'maintainability' 'maintainability' 'none'
 'none' 'none' 'none']


In [238]:
finalDF['affectedqa'] = y_pred_QA

In [239]:
finalDF

Unnamed: 0,factid,drivertype,goaltype,sourceelementname,sourceelementtype,layersource,targetelementname,targetelementtype,layertarget,isnewelement,iscyclic,facttype,relatontype,actiontype,incoming,outcoming,ratiolinks,mostlinkedlayer,property,propertynewvalue,propertyoldvalue,commitlogs_ngrams,chatlogs_ngrams,adrlogs_ngrams,atdcause,affectedqa
0,127,improve process,minimal process definition,production service,service,business,,,,,False,element,,create,1,1,0.17,business,,,,,,,noatd,none
1,128,improve process,minimal process definition,production process,process,business,,,,,False,element,,create,0,4,0.33,business,,,,,,,dependency violations,none
2,129,improve process,minimal process definition,production planning,process,business,,,,,False,element,,create,1,1,0.17,business,,,,,,,dependency violations,none
3,130,improve process,minimal process definition,processing,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,,dependency violations,none
4,131,improve process,minimal process definition,packaging,process,business,,,,,False,element,,create,2,1,0.25,business,,,,,,,dependency violations,none
5,132,improve process,minimal process definition,delivering process,process,business,,,,,False,element,,create,1,0,0.08,business,,,,,,,dependency violations,none
6,133,improve process,minimal process definition,production service,service,business,simplification of product manufacturing process,requirement,motivation,True,False,relation,realization,create,0,0,0.0,business,,,,,,,dependency violations,none
7,134,improve process,minimal process definition,production process,process,business,production planning,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations,none
8,135,improve process,minimal process definition,production process,process,business,processing,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations,none
9,136,improve process,minimal process definition,production process,process,business,packaging,process,business,True,False,relation,composition,create,0,0,0.0,business,,,,,,,dependency violations,none


In [240]:
finalDFNoOtherFields = finalDF[['factid', 'atdcause', 'affectedqa']]

In [241]:
finalDFNoOtherFields.to_csv('/Users/borisperezg/rebelmodels_storing/datasets_classified/'+fileName+'_done.csv', index=False, index_label=True)
