In [82]:
# ! pip3 install GitPython
# ! pip3 install seaborn

import numpy as np
import pandas as pd
import os
import time

import git
from git import RemoteProgress
from git import Repo

import matplotlib.pyplot as plt
import seaborn as sns

BLUE   = '\033[94m'
GREEN  = '\033[92m'
ORANGE = '\033[93m'
RED    = '\033[91m'
ENDC   = '\033[0m'

### Set 'repository', 'fixing commit' and 'affected file'

In [87]:
### (1) ONOS
local_link = "onos"
fixing_commit = "af1fa39a53c0016e92c1de246807879c16f507d6"
affected_files = ["cli/src/main/java/org/onosproject/cli/net/DeviceSetControllersCommand.java",
                  "drivers/utilities/src/main/java/org/onosproject/drivers/utilities/XmlConfigParser.java"]

### (2) OFBIZ
# local_link = "ofbiz"
# # fixing_commit = "e47a65896bd12e23e090436c0b6e2478f162ae3e" # PROVIDED FIXING COMMIT (that can't be found)
# fixing_commit = "34125e42d1db74064482c296c871e11c92dc4527"   # ACTUAL FIXING COMMIT 
# affected_files = ["framework/base/src/org/ofbiz/base/util/template/FreeMarkerWorker.java"]

### (3) STRUTS
# local_link = "struts"
# fixing_commit = "9d47af6ffa355977b5acc713e6d1f25fac260a28"
# affected_files = ["core/src/main/java/com/opensymphony/xwork2/validator/validators/URLValidator.java"]

### (a) Message and title of fixing commit

In [88]:
repo = Repo(local_link)

show_data = repo.git.show("-s", fixing_commit).splitlines()
for line in show_data:
    print(line)

commit af1fa39a53c0016e92c1de246807879c16f507d6
Author: Andrea Campanella <andrea@opennetworking.org>
Date:   Tue Jun 26 11:16:51 2018 +0200

    [OS] Fix for XXE in netconf drivers xml utils
    
    Change-Id: Ie38901decb59712c7cf6c717f42bbb746c1e1921


### (b) total files affected 

In [48]:
# 'commit.hexsha' can be replaced with just 'commit'
print("total repo commits: ", len(list(repo.iter_commits())), "\n")

for commit in list(repo.iter_commits()):
#     print(commit)
#     print(commit.message)
    if commit.hexsha == fixing_commit: 
        print(commit.stats.files)
        print("number of files affected: ", len(commit.stats.files), "\n")
        print("commit hash: ", commit)

total repo commits:  24427 

{'framework/base/src/org/ofbiz/base/util/template/FreeMarkerWorker.java': {'insertions': 31, 'deletions': 0, 'lines': 31}}
number of files affected:  1 

commit hash:  34125e42d1db74064482c296c871e11c92dc4527


### (c) total directories affected 

In [37]:
for commit in list(repo.iter_commits()):
    if str(commit) == fixing_commit: 
        
        for file in commit.stats.files:
#             print(file)
            folder = file.split("/")[0:-1]
            print("folder: " + "/".join(folder))

folder: framework/base/src/org/ofbiz/base/util/template


### (d) total lines of code (INCLUDING comments and blank lines) deleted
### (e) total lines of code (INCLUDING comments and blank lines) added
### (f) total lines of code (EXCLUDING comments and blank lines) deleted
### (g) total lines of code (EXCLUDING comments and blank lines) added

In [70]:
diff_data = repo.git.diff(fixing_commit + "^", fixing_commit).splitlines()

total_lines_added_including_blank_comments   = 0
total_lines_added_excluding_blank_comments   = 0
total_lines_removed_including_blank_comments = 0
total_lines_removed_excluding_blank_comments = 0

isComment = False

for line in diff_data:

    # determine whether the line is an addition or a deletion
    isAdded   = False
    isRemoved = False
    
    if line.startswith("+"):
        isAdded = True 
    if line.startswith("-"):
        isRemoved = True

    # remove the leading '+'
    line = line[1:]

    # ignore 'file path' lines
    if line.startswith("++") or line.startswith("--"):
        continue
    
    # detect when comment code starts
    if line.strip().startswith("/*") or line.startswith('"""'):
        isComment = True

    # if line is empty or is a single-line comment, it is a 'blank or comment' line
    if len(line.strip()) == 0:
        total_lines_added_including_blank_comments += 1
#         print(line)
        print("[" + ORANGE + "BLANK" + ENDC + "]")
    elif isAdded and (isComment or line.startswith("//") or line.startswith("#")):
        total_lines_added_including_blank_comments += 1
        print("[" + ORANGE + line + ENDC + "]")
    elif isAdded:
#             print("Whitespace chars: ", len(line), " vs total chars: ", len(line))
        total_lines_added_including_blank_comments += 1
        total_lines_added_excluding_blank_comments += 1
        print("[" + BLUE + line + ENDC + "]")
    elif isRemoved:
        print("[" + RED + line + ENDC + "]")
        total_lines_removed_including_blank_comments += 1
        total_lines_removed_excluding_blank_comments += 1      
       
        # detect when comment code ends
    if line.strip().endswith("*/") or line.endswith('"""'):
        isComment = False
        
print("total lines added (INCLUDING comments and blanks): ", total_lines_added_including_blank_comments)
print("total lines added (EXCLUDING comments and blanks): ", total_lines_added_excluding_blank_comments)
print("total lines removed (INCLUDING comments and blanks): ", total_lines_removed_including_blank_comments)
print("total lines removed (EXCLUDING comments and blanks): ", total_lines_removed_excluding_blank_comments)

[[94mimport java.io.PrintWriter;[0m]
[[94mimport java.io.StringWriter;[0m]
[[94mimport org.ofbiz.base.util.StringUtil.SimpleEncoder;[0m]
[[93mBLANK[0m]
[[94mimport freemarker.template.TemplateExceptionHandler;[0m]
[[93mBLANK[0m]
[[94m        newConfig.setTemplateExceptionHandler(new FreeMarkerWorker.OFBizTemplateExceptionHandler());[0m]
[[93mBLANK[0m]
[[93m    /**[0m]
[[93m     * OFBiz specific TemplateExceptionHandler.  Sanitizes any error messages present in[0m]
[[93m     * the stack trace prior to printing to the output writer.[0m]
[[93m     *[0m]
[[93m     */[0m]
[[94m    static class OFBizTemplateExceptionHandler implements TemplateExceptionHandler {[0m]
[[93mBLANK[0m]
[[94m        @Override[0m]
[[94m        public void handleTemplateException(TemplateException te, Environment env, Writer out) throws TemplateException {[0m]
[[94m            StringWriter tempWriter = new StringWriter();[0m]
[[94m            PrintWriter pw = new PrintWriter(tempW

### (h) How many days between fixing commit and previous commit to the same file?
### (i) How many times has the file been modified since creation?
### (j) Which developers have modifed the file?

In [92]:
import math

last_commit_time = -1
between_time = -1
prevCommit = False
contributors = []

for affected_file in affected_files:
    print("\t", ORANGE, affected_file, ENDC)
    commits_touching_path = list(repo.iter_commits(paths=affected_file))

    # print(commits_touching_path)

    for commit in commits_touching_path:

        # GitPython differentiates between 'author' and 'commiter'
        # commited_date is in epoch time
        year  = str(time.localtime(commit.committed_date).tm_year)
        month = str(time.localtime(commit.committed_date).tm_mon)
        day   = str(time.localtime(commit.committed_date).tm_mday)

        if prevCommit:
            print(GREEN + commit.hexsha + ENDC + "\t" + day + "/" + month + "/" + year + "\t" + str(commit.committer))
            prevCommit = False
            between_time = last_commit_time - commit.committed_date
        elif commit.hexsha == fixing_commit:
            print(BLUE + commit.hexsha + ENDC + "\t" + day + "/" + month + "/" + year + "\t" + str(commit.committer))
            last_commit_time = commit.committed_date
            prevCommit = True
        else:
            print(commit.hexsha + "\t" + day + "/" + month + "/" + year + "\t" + str(commit.committer))

        # add the contributor to the list of contributors, if they are not already on the list
        if str(commit.committer) not in contributors:
            contributors.append(str(commit.committer))

    print("\nTotal commits to file:", len(commits_touching_path))
    print("Days between fixing commit and previous commit:", math.ceil(between_time / 86400), "days")
    print("Unique contributors: ", GREEN, contributors, ENDC)
    print("Total unique contributors to file: ", len(contributors), "\n\n")

	 [93m cli/src/main/java/org/onosproject/cli/net/DeviceSetControllersCommand.java [0m
0068fd084e2ce1bddf113c36cb63347ce4350c59	12/10/2018	Ray Milkey
d84f89ba3d18fb8e9c44fa0bab004d24c56b0a81	29/9/2018	Ray Milkey
[94maf1fa39a53c0016e92c1de246807879c16f507d6[0m	26/6/2018	Andrea Campanella
[92m67de597c5d2ccef246c7f564beab6064695e509c[0m	26/3/2018	Yuta HIGUCHI
a09fe5b0b75659dc9ff23fe938dd50aea0a0b129	4/8/2017	Brian O'Connor
54f28e23ac9a969f7d33d5756c9f929bf97c6520	3/8/2016	Gerrit Code Review
5ab426f9ffd145916bb41df408d7fef1781b9128	9/4/2016	Brian O'Connor
eb70a94b5504180de5772d5229a4aae353960240	20/11/2015	Gerrit Code Review
ed976a405fa3db38903ce369c9ece7023d310a21	9/10/2015	Gerrit Code Review

Total commits to file: 9
Days between fixing commit and previous commit: 93 days
Unique contributors:  [92m ['Ray Milkey', 'Andrea Campanella', 'Yuta HIGUCHI', "Brian O'Connor", 'Gerrit Code Review'] [0m
Total unique contributors to file:  5 


	 [93m drivers/utilities/src/main/java/org/onos

### (k) For each developer in (j), how may commits have they submitted? Are they experienced or new?

In [75]:
commit_authors = repo.git.shortlog("-sne", "--all").splitlines()

author_commits = []

for commit_author in commit_authors:
    commit, author = commit_author.split("\t")
    # remove author's email
    author_clean = author.split("<")[0].strip()
#     print("author: [" + author_clean + "]")
    if author_clean in contributors:
        print("author: [" + author_clean + "]")
        author_commits.append([int(commit), author])
    
df = pd.DataFrame(author_commits, columns=['Commit', 'Author'])
df.head(100)

author: [Jacques Le Roux]
author: [Jacopo Cappellato]
author: [Johannes Cornelius Bakker]
author: [Adam Heath]
author: [Adrian Crum]
author: [Scott Gray]
author: [David E. Jones]
author: [Marco Risaliti]
author: [Deepak Dixit]
author: [Bruno Busco]
author: [Taher A. Alkhateeb]


Unnamed: 0,Commit,Author
0,9425,Jacques Le Roux <jleroux@apache.org>
1,3245,Jacopo Cappellato <jacopoc@apache.org>
2,2605,Johannes Cornelius Bakker <hansbak@apache.org>
3,2034,Adam Heath <doogie@apache.org>
4,1933,Adrian Crum <adrianc@apache.org>
5,1590,Scott Gray <lektran@apache.org>
6,1411,David E. Jones <jonesde@apache.org>
7,835,Marco Risaliti <mrisaliti@apache.org>
8,648,Deepak Dixit <deepak@apache.org>
9,485,Bruno Busco <buscob@apache.org>
