# Análise dos valores adicionados em modificações por bug report para os campos: status, resolution, severity e priority

Nas mudanças de cada bug report existe o campo "added" que indica o valor novo do campo em questão.

## Importações

In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib inline

plt.style.use('ggplot')
pd.set_option('display.float_format', lambda x: '%.2f' % x)

with open('processed_bugs_counting_added_values_final.json') as input:
  bugs = json.load(input)

## Pré-processamento

In [2]:
bugs_status = []
bugs_resolution = []
bugs_priority = []
bugs_severity = []

for bug in bugs:
  bugs_status.append(bug['status'])
  bugs_resolution.append(bug['resolution'])
  bugs_priority.append(bug['priority'])
  bugs_severity.append(bug['severity'])

df_bugs_status = pd.DataFrame(bugs_status)
df_bugs_resolution = pd.DataFrame(bugs_resolution)
df_bugs_priority = pd.DataFrame(bugs_priority)
df_bugs_severity = pd.DataFrame(bugs_severity)

bugs.clear()
bugs_status.clear()
bugs_priority.clear()
bugs_severity.clear()

## 1. Estatísticas por campo

### 1.1 Status

In [3]:
df_bugs_status.describe()

Unnamed: 0,RESOLVED,ASSIGNED,REOPENED,VERIFIED,NEW,UNCONFIRMED
count,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0
mean,1.07,0.15,0.07,0.0,0.04,0.0
std,0.38,0.37,0.37,0.03,0.21,0.08
min,1.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.0,0.0,0.0,0.0,0.0
max,20.0,6.0,19.0,4.0,7.0,6.0


### 1.2 Resolution

In [4]:
df_bugs_resolution.describe()

Unnamed: 0,Unnamed: 1,FIXED,INVALID,INCOMPLETE,DUPLICATE,WORKSFORME,WONTFIX,INACTIVE,MOVED
count,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0
mean,0.07,0.57,0.08,0.12,0.13,0.08,0.09,0.0,0.0
std,0.38,0.56,0.27,0.45,0.35,0.28,0.29,0.07,0.04
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,19.0,20.0,16.0,17.0,4.0,4.0,5.0,2.0,9.0


### 1.3 Priority

In [5]:
df_bugs_priority.describe()

Unnamed: 0,P3,P4,P2,P1,--,P5
count,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0
mean,0.07,0.01,0.05,0.05,0.01,0.02
std,0.26,0.09,0.23,0.22,0.1,0.13
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0
max,3.0,3.0,7.0,5.0,4.0,4.0


### 1.4 Severity

In [6]:
df_bugs_severity.describe()

Unnamed: 0,minor,blocker,normal,critical,enhancement,major,trivial,S4,N/A,--,S3,S2,S1
count,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0,690817.0
mean,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0
std,0.06,0.05,0.07,0.09,0.09,0.08,0.03,0.05,0.05,0.04,0.1,0.05,0.02
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,2.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0


## 2. Percentual de presença das adições de valores do campo em toda a base

### 2.1 Status

In [7]:
for column in df_bugs_status.columns.tolist():
  percent = (df_bugs_status[column].astype(bool).sum(axis=0)/len(df_bugs_status)) * 100
  print(f'Percentual do valor "{column}": {percent: .2f}%')

Percentual do valor "RESOLVED":  100.00%
Percentual do valor "ASSIGNED":  14.95%
Percentual do valor "REOPENED":  5.08%
Percentual do valor "VERIFIED":  0.09%
Percentual do valor "NEW":  4.17%
Percentual do valor "UNCONFIRMED":  0.47%


### 2.2 Resolution

In [8]:
for column in df_bugs_resolution.columns.tolist():
  percent = (df_bugs_resolution[column].astype(bool).sum(axis=0)/len(df_bugs_resolution)) * 100
  print(f'Percentual do valor "{column}": {percent: .2f}%')

Percentual do valor "":  5.38%
Percentual do valor "FIXED":  54.33%
Percentual do valor "INVALID":  7.49%
Percentual do valor "INCOMPLETE":  9.64%
Percentual do valor "DUPLICATE":  13.32%
Percentual do valor "WORKSFORME":  8.15%
Percentual do valor "WONTFIX":  9.03%
Percentual do valor "INACTIVE":  0.46%
Percentual do valor "MOVED":  0.16%


### 2.3 Priority

In [9]:
for column in df_bugs_priority.columns.tolist():
  percent = (df_bugs_priority[column].astype(bool).sum(axis=0)/len(df_bugs_priority)) * 100
  print(f'Percentual do valor "{column}": {percent: .2f}%')

Percentual do valor "P3":  7.07%
Percentual do valor "P4":  0.72%
Percentual do valor "P2":  5.03%
Percentual do valor "P1":  4.93%
Percentual do valor "--":  0.84%
Percentual do valor "P5":  1.69%


### 2.4 Severity

In [10]:
for column in df_bugs_severity.columns.tolist():
  percent = (df_bugs_severity[column].astype(bool).sum(axis=0)/len(df_bugs_severity)) * 100
  print(f'Percentual do valor "{column}": {percent: .2f}%')

Percentual do valor "minor":  0.31%
Percentual do valor "blocker":  0.27%
Percentual do valor "normal":  0.45%
Percentual do valor "critical":  0.82%
Percentual do valor "enhancement":  0.74%
Percentual do valor "major":  0.67%
Percentual do valor "trivial":  0.10%
Percentual do valor "S4":  0.29%
Percentual do valor "N/A":  0.27%
Percentual do valor "--":  0.15%
Percentual do valor "S3":  0.94%
Percentual do valor "S2":  0.25%
Percentual do valor "S1":  0.06%
