# **Format Data**

In this notebook we format the resulting table obtained from the previous section (integration).

In [1]:
import pandas as pd
import numpy as np

We first read the Data Frame from the previous step, *Integrate Data*:

In [2]:
dataFrame = pd.read_csv("../../data/interim/DataPreparation/DATA_FRAME.csv").iloc[:,1:]
dataFrame.head()

Unnamed: 0,committer,numberCommits,fixedSZZIssues,fixedSonarIssues,fixedJiraIssues,inducedSZZIssues,inducedSonarIssues,timeInProject,resolutionTime,type_Bug,...,securityRemediationEffort,debt,closeTime,type_BUG,type_CODE_SMELL,type_VULNERABILITY,severity_BLOCKER,severity_CRITICAL,severity_INFO,severity_MAJOR
0,-l,27.0,0.0,0.0,0.0,0.0,2.0,4235880.0,0.0,0.0,...,0.0,0.833333,22617.766389,0.0,2.0,0.0,0.0,0.0,0.0,2.0
1,1028332163,14.0,0.0,0.0,0.0,0.0,0.0,77939.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,A. J. David Bosschaert,432.0,51.0,0.0,0.0,1.0,0.0,173937105.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,A195882,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.933333,809.977238,0.0,18.0,0.0,0.0,0.0,7.0,8.0
4,A744013,5.0,0.0,4.0,0.0,0.0,3.0,351970.0,0.0,0.0,...,0.0,17.133333,821.388563,14.0,78.0,3.0,0.0,25.0,0.0,24.0


This is the list of the attributes included in this Data Frame:

In [3]:
list(dataFrame)

['committer',
 'numberCommits',
 'fixedSZZIssues',
 'fixedSonarIssues',
 'fixedJiraIssues',
 'inducedSZZIssues',
 'inducedSonarIssues',
 'timeInProject',
 'resolutionTime',
 'type_Bug',
 'type_Dependency upgrade',
 'type_Documentation',
 'type_Epic',
 'type_Improvement',
 'type_New Feature',
 'type_Question',
 'type_Story',
 'type_Sub-task',
 'type_Task',
 'type_Technical task',
 'type_Test',
 'type_Wish',
 'priority_Blocker',
 'priority_Critical',
 'priority_Major',
 'priority_Minor',
 'priority_Trivial',
 'ADD',
 'DELETE',
 'MODIFY',
 'RENAME',
 'UNKNOWN',
 'linesAdded',
 'linesRemoved',
 'bug',
 'refactoringType_Change Package',
 'refactoringType_Extract And Move Method',
 'refactoringType_Extract Class',
 'refactoringType_Extract Interface',
 'refactoringType_Extract Method',
 'refactoringType_Extract Subclass',
 'refactoringType_Extract Superclass',
 'refactoringType_Extract Variable',
 'refactoringType_Inline Method',
 'refactoringType_Inline Variable',
 'refactoringType_Move And

### RENAME ATTRIBUTES:

The first thing that we can do is to rename the attributes in order to be consistent and also, to clarify they meaning:

In [4]:
dic = {"time_between_commits":"timeBetweenCommits", 'type_Bug':	'jiraBug', 'type_Dependency upgrade':'jiraDependencyUpgrade',
       'type_Documentation':'jiraDocumentation',	'type_Epic':'jiraEpic', 'type_Improvement':'jiraImprovement',
       'type_New Feature':'jiraNewFeature',	'type_Question':'jiraQuestion',	'type_Story':'jiraStory',
       'type_Sub-task':'jiraSub-task',	'type_Task':'jiraTask',	'type_Technical task':'jiraTechnicalTask',
       'type_Test':'jiraTest',	'type_Wish':'jiraWish', 'priority_Blocker':'jiraBlocker',	'priority_Critical':'jiraCritical',
       'priority_Major':'jiraMajor',	'priority_Minor':'jiraMinor',	'priority_Trivial':'jiraTrivial',
       'ADD':'commitChangeAdd',	'DELETE':'commitChangeDelete',	'MODIFY':'commitChangeModify',	'RENAME':'commitChangeRename',
       'UNKNOWN':'commitChangeUnknown', 'bug':'refactoringInducedBug', 'refactoringType_Change Package':'refactoringChangePackage',
       'refactoringType_Extract And Move Method':'refactoringExtractAndMoveMethod', 'refactoringType_Extract Class':'refactoringExtractClass',
       'refactoringType_Extract Interface':'refactoringExtractInterface', 'refactoringType_Extract Method':'refactoringExtractMethod',
       'refactoringType_Extract Subclass':'refactoringExtractSubclass', 'refactoringType_Extract Superclass':'refactoringExtractSuperclass',
       'refactoringType_Extract Variable':'refactoringExtractVariable', 'refactoringType_Inline Method':'refactoringInlineMethod',
       'refactoringType_Inline Variable':'refactoringInlineVariable', 'refactoringType_Move And Rename Attribute':'refactoringMoveAndRenameAttribute',
       'refactoringType_Move And Rename Class':'refactoringMoveAndRenameClass', 'refactoringType_Move Attribute':'refactoringMoveAttribute',
       'refactoringType_Move Class':'refactoringMoveClass', 'refactoringType_Move Method':'refactoringMoveMethod',
       'refactoringType_Move Source Folder':'refactoringMoveSourceFolder', 'refactoringType_Parameterize Variable':'refactoringParameterizeVariable',
       'refactoringType_Pull Up Attribute':'refactoringPullUpAttribute', 'refactoringType_Pull Up Method':'refactoringPullUpMethod',
       'refactoringType_Push Down Attribute':'refactoringPushDownAttribute', 'refactoringType_Push Down Method':'refactoringPushDownMethod',
       'refactoringType_Rename Attribute':'refactoringRenameAttribute', 'refactoringType_Rename Class':'refactoringRenameClass',
       'refactoringType_Rename Method':'refactoringRenameMethod', 'refactoringType_Rename Package':'refactoringRenamePackage',
       'refactoringType_Rename Parameter':'refactoringRenameParameter', 'refactoringType_Rename Variable':'refactoringRenameVariable',
       'refactoringType_Replace Attribute':'refactoringReplaceAttribute', 'refactoringType_Replace Variable With Attribute':'refactoringReplaceVariableWithAttribute', 
       'functions':'codeFunctions', 'commentLinesDensity':'codeCommentLinesDensity', 'complexity':'codeComplexity', 'functionComplexity':'codeFunctionComplexity',
       'duplicatedLinesDensity':'codeDuplicatedLinesDensity','violations':'codeViolations', 'blockerViolations':'codeBlockerViolations',
       'criticalViolations':'codeCriticalViolations', 'infoViolations':'codeInfoViolations', 'majorViolations':'codeMajorViolations', 
       'minorViolations':'codeMinorViolations', 'codeSmells':'codeCodeSmells', 'bugs':'codeBugs', 'vulnerabilities':'codeVulnerabilities',
       'cognitiveComplexity':'codeCognitiveComplexity', 'ncloc':'codeNcloc', 'sqaleIndex':'codeSqaleIndex', 'sqaleDebtRatio':'codeSqaleDebtRatio',
       'reliabilityRemediationEffort':'codeReliabilityRemediationEffort', 'securityRemediationEffort':'codeSecurityRemediationEffort',
       'debt':'sonarDebt', 'closeTime':'sonarCloseTime','type_BUG':'sonarBug', 'type_CODE_SMELL':'sonarCodeSmell', 'type_VULNERABILITY':'sonarVulnerability',
       'severity_BLOCKER':'sonarBlocker', 'severity_CRITICAL':'sonarCritical', 'severity_INFO':'sonarInfo', 'severity_MAJOR':'sonarMajor',
       'severity_MINOR':'sonarMinor'}

In [5]:
dataFrame = dataFrame.rename(columns=dic)
dataFrame.head()

Unnamed: 0,committer,numberCommits,fixedSZZIssues,fixedSonarIssues,fixedJiraIssues,inducedSZZIssues,inducedSonarIssues,timeInProject,resolutionTime,jiraBug,...,codeSecurityRemediationEffort,sonarDebt,sonarCloseTime,sonarBug,sonarCodeSmell,sonarVulnerability,sonarBlocker,sonarCritical,sonarInfo,sonarMajor
0,-l,27.0,0.0,0.0,0.0,0.0,2.0,4235880.0,0.0,0.0,...,0.0,0.833333,22617.766389,0.0,2.0,0.0,0.0,0.0,0.0,2.0
1,1028332163,14.0,0.0,0.0,0.0,0.0,0.0,77939.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,A. J. David Bosschaert,432.0,51.0,0.0,0.0,1.0,0.0,173937105.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,A195882,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.933333,809.977238,0.0,18.0,0.0,0.0,0.0,7.0,8.0
4,A744013,5.0,0.0,4.0,0.0,0.0,3.0,351970.0,0.0,0.0,...,0.0,17.133333,821.388563,14.0,78.0,3.0,0.0,25.0,0.0,24.0


### **Save the final Data Frame:**

In [6]:
dataFrame.to_csv('../../data/processed/DEVELOPERS_DATA.csv', header=True)