In [56]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import os

In [57]:
# print(os.listdir("../Software_Defect"))
data = pd.read_csv('kc2.csv')

In [58]:
trace = go.Scatter(
    x = data.v,
    y = data.b,
    mode = "markers",
    name = "Volume - Bug",
    marker = dict(color = 'darkblue'),
    text = "Bug (b)")

scatter_data = [trace]
scatter_layout = dict(title = 'Volume - Bug',
              xaxis = dict(title = 'Volume', ticklen = 5),
              yaxis = dict(title = 'Bug' , ticklen = 5),
             )
fig = dict(data = scatter_data, layout = scatter_layout)
iplot(fig)

In [59]:
data.isnull().sum()

loc                 0
v(g)                0
ev(g)               0
iv(g)               0
n                   0
v                   0
l                   0
d                   0
i                   0
e                   0
b                   0
t                   0
lOCode              0
lOComment           0
lOBlank             0
lOCodeAndComment    0
uniq_Op             0
uniq_Opnd           0
total_Op            0
total_Opnd          0
branchCount         0
problems            0
dtype: int64

In [60]:
trace1 = go.Box(
    x = data.uniq_Op,
    name = 'Unique Operators',
    marker = dict(color = 'blue')
    )
box_data = [trace1]
iplot(box_data)

In [61]:
def evaluation_control(data):    
    evaluation = (data.n < 300) & (data.v < 1000 ) & (data.d < 50) & (data.e < 500000) & (data.t < 5000)
    data['complexityEvaluation'] = pd.DataFrame(evaluation)
    data['complexityEvaluation'] = ['Succesful' if evaluation == True else 'Redesign' for evaluation in data.complexityEvaluation]

In [62]:
evaluation_control(data)
data

Unnamed: 0,loc,v(g),ev(g),iv(g),n,v,l,d,i,e,...,lOComment,lOBlank,lOCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,problems,complexityEvaluation
0,1.1,1.4,1.4,1.4,1.3,1.30,1.30,1.30,1.30,1.30,...,2,2,2,1.2,1.2,1.2,1.2,1.4,no,Succesful
1,1.0,1.0,1.0,1.0,1.0,1.00,1.00,1.00,1.00,1.00,...,1,1,1,1.0,1.0,1.0,1.0,1.0,yes,Succesful
2,415.0,59.0,50.0,51.0,1159.0,8411.31,0.01,103.53,81.24,870848.58,...,35,9,10,47.0,106.0,692.0,467.0,106.0,yes,Redesign
3,230.0,33.0,10.0,16.0,575.0,3732.82,0.03,39.82,93.74,148644.06,...,15,34,5,23.0,67.0,343.0,232.0,65.0,yes,Redesign
4,175.0,26.0,12.0,13.0,500.0,3123.96,0.03,29.48,105.96,92103.07,...,7,19,4,18.0,58.0,310.0,190.0,51.0,yes,Redesign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,4.0,1.0,1.0,1.0,5.0,11.61,0.50,2.00,5.80,23.22,...,0,0,0,4.0,1.0,4.0,1.0,1.0,yes,Succesful
518,4.0,1.0,1.0,1.0,4.0,8.00,0.67,1.50,5.33,12.00,...,0,0,0,3.0,1.0,3.0,1.0,1.0,yes,Succesful
519,4.0,1.0,1.0,1.0,4.0,8.00,0.67,1.50,5.33,12.00,...,0,0,0,3.0,1.0,3.0,1.0,1.0,yes,Succesful
520,4.0,1.0,1.0,1.0,5.0,11.61,0.67,1.50,7.74,17.41,...,0,0,0,3.0,2.0,3.0,2.0,1.0,yes,Succesful


In [63]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522 entries, 0 to 521
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   loc                   522 non-null    float64
 1   v(g)                  522 non-null    float64
 2   ev(g)                 522 non-null    float64
 3   iv(g)                 522 non-null    float64
 4   n                     522 non-null    float64
 5   v                     522 non-null    float64
 6   l                     522 non-null    float64
 7   d                     522 non-null    float64
 8   i                     522 non-null    float64
 9   e                     522 non-null    float64
 10  b                     522 non-null    float64
 11  t                     522 non-null    float64
 12  lOCode                522 non-null    int64  
 13  lOComment             522 non-null    int64  
 14  lOBlank               522 non-null    int64  
 15  lOCodeAndComment      5

In [64]:
data.groupby("complexityEvaluation").size()

complexityEvaluation
Redesign      73
Succesful    449
dtype: int64

In [65]:
# Histogram
trace = go.Histogram(
    x = data.complexityEvaluation,
    opacity = 0.75,
    name = 'Complexity Evaluation',
    marker = dict(color = 'darkorange')
)
hist_data = [trace]
hist_layout = go.Layout(barmode='overlay',
                   title = 'Complexity Evaluation',
                   xaxis = dict(title = 'Succesful - Redesign'),
                   yaxis = dict(title = 'Frequency')
)
fig = go.Figure(data = hist_data, layout = hist_layout)
iplot(fig)

In [66]:
from sklearn import preprocessing

scale_v = data[['v']]
scale_b = data[['b']]

minmax_scaler = preprocessing.MinMaxScaler()

v_scaled = minmax_scaler.fit_transform(scale_v)
b_scaled = minmax_scaler.fit_transform(scale_b)

data['v_ScaledUp'] = pd.DataFrame(v_scaled)
data['b_ScaledUp'] = pd.DataFrame(b_scaled)

data

Unnamed: 0,loc,v(g),ev(g),iv(g),n,v,l,d,i,e,...,lOCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,problems,complexityEvaluation,v_ScaledUp,b_ScaledUp
0,1.1,1.4,1.4,1.4,1.3,1.30,1.30,1.30,1.30,1.30,...,2,1.2,1.2,1.2,1.2,1.4,no,Succesful,0.000038,0.115350
1,1.0,1.0,1.0,1.0,1.0,1.00,1.00,1.00,1.00,1.00,...,1,1.0,1.0,1.0,1.0,1.0,yes,Succesful,0.000030,0.088731
2,415.0,59.0,50.0,51.0,1159.0,8411.31,0.01,103.53,81.24,870848.58,...,10,47.0,106.0,692.0,467.0,106.0,yes,Redesign,0.248748,0.248447
3,230.0,33.0,10.0,16.0,575.0,3732.82,0.03,39.82,93.74,148644.06,...,5,23.0,67.0,343.0,232.0,65.0,yes,Redesign,0.110391,0.110027
4,175.0,26.0,12.0,13.0,500.0,3123.96,0.03,29.48,105.96,92103.07,...,4,18.0,58.0,310.0,190.0,51.0,yes,Redesign,0.092385,0.092280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,4.0,1.0,1.0,1.0,5.0,11.61,0.50,2.00,5.80,23.22,...,0,4.0,1.0,4.0,1.0,1.0,yes,Succesful,0.000343,0.000000
518,4.0,1.0,1.0,1.0,4.0,8.00,0.67,1.50,5.33,12.00,...,0,3.0,1.0,3.0,1.0,1.0,yes,Succesful,0.000237,0.000000
519,4.0,1.0,1.0,1.0,4.0,8.00,0.67,1.50,5.33,12.00,...,0,3.0,1.0,3.0,1.0,1.0,yes,Succesful,0.000237,0.000000
520,4.0,1.0,1.0,1.0,5.0,11.61,0.67,1.50,7.74,17.41,...,0,3.0,2.0,3.0,2.0,1.0,yes,Succesful,0.000343,0.000000


In [67]:
scaled_data = pd.concat([data.v , data.b , data.v_ScaledUp , data.b_ScaledUp], axis=1)
scaled_data

Unnamed: 0,v,b,v_ScaledUp,b_ScaledUp
0,1.30,1.30,0.000038,0.115350
1,1.00,1.00,0.000030,0.088731
2,8411.31,2.80,0.248748,0.248447
3,3732.82,1.24,0.110391,0.110027
4,3123.96,1.04,0.092385,0.092280
...,...,...,...,...
517,11.61,0.00,0.000343,0.000000
518,8.00,0.00,0.000237,0.000000
519,8.00,0.00,0.000237,0.000000
520,11.61,0.00,0.000343,0.000000


In [68]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522 entries, 0 to 521
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   loc                   522 non-null    float64
 1   v(g)                  522 non-null    float64
 2   ev(g)                 522 non-null    float64
 3   iv(g)                 522 non-null    float64
 4   n                     522 non-null    float64
 5   v                     522 non-null    float64
 6   l                     522 non-null    float64
 7   d                     522 non-null    float64
 8   i                     522 non-null    float64
 9   e                     522 non-null    float64
 10  b                     522 non-null    float64
 11  t                     522 non-null    float64
 12  lOCode                522 non-null    int64  
 13  lOComment             522 non-null    int64  
 14  lOBlank               522 non-null    int64  
 15  lOCodeAndComment      5

In [69]:
data.shape

(522, 25)

In [70]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn import model_selection

X = data.iloc[:, :-10].values  #Select related attribute values for selection
Y = data.complexityEvaluation.values   #Select classification attribute values

In [71]:
Y

array(['Succesful', 'Succesful', 'Redesign', 'Redesign', 'Redesign',
       'Redesign', 'Redesign', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Redesign', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Redesign', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',
       'Succesful', 'Succesful', 'Succesful', 'Succesful', 'Succesful',

In [72]:
#Parsing selection and verification datasets
validation_size = 0.20
seed = 42
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size = validation_size, random_state = seed)

In [73]:
from sklearn import svm

In [74]:
model = svm.SVC(kernel='linear', C=0.01)

In [75]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

#Summary of the predictions made by the classifier
print("SVM Algorithm")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
#Accuracy score
from sklearn.metrics import accuracy_score
print("ACC: ",accuracy_score(y_pred,y_test))

SVM Algorithm
              precision    recall  f1-score   support

    Redesign       1.00      1.00      1.00        13
   Succesful       1.00      1.00      1.00        92

    accuracy                           1.00       105
   macro avg       1.00      1.00      1.00       105
weighted avg       1.00      1.00      1.00       105

[[13  0]
 [ 0 92]]
ACC:  1.0
