## Setup

In [1]:
import os, subprocess

In [2]:
# Function to execute git commands
def execute_command(cmd, work_dir):
    pipe = subprocess.Popen(cmd, shell=True, cwd=work_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    (out, error) = pipe.communicate()
    if error:
        return error
    return out
    pipe.wait()

## Getting all commit hashes from Butterknife

In [3]:
userhome = os.path.expanduser('~')
project = 'butterknife'
repository = f'{userhome}/Desktop/diff/datasource/{project}'

In [4]:
git_cmd = 'git log --pretty=format:"%H"'
log = execute_command(git_cmd, repository)
list_of_all_commits = log.decode('utf-8').split('\n')[180:] # Get the first 836 commits
total_commits = len(list_of_all_commits)

## Utility to count number of added or deleted lines between commits

In [5]:
def count_no_of_lines(commit, parent_commit, algorithm, sign, repository):
    git_cmd = (
        f'git diff -w --ignore-blank-lines --diff-algorithm={algorithm} {commit} {parent_commit} | '
        f'grep \'^[{sign}]\' | '
        f'grep -Ev \'^(--- a/|\+\+\+ b/)\' | '
        f'wc -l'
    )
    return execute_command(git_cmd, repository)

## Compare differences in NLA and NLD for the two algorithms specified

In [6]:
MYERS = 'myers'
HISTOGRAM = 'histogram'

In [7]:
diff_commits = 0 # stores the number of different commits
for parent_commit in range(len(list_of_all_commits)-1,1,-1):
    # parent commit hash
    parent_commit_hash = list_of_all_commits[parent_commit]
    # commit hash
    commit_hash = list_of_all_commits[parent_commit-1]
    # NLA: Myers
    nla_from_myers = int(count_no_of_lines(commit_hash, parent_commit_hash, MYERS, '+', repository).decode('utf-8'))
    # NLA: Histogram
    nla_from_histogram = int(count_no_of_lines(commit_hash, parent_commit_hash, HISTOGRAM, '+', repository).decode('utf-8'))
    # NLD: Myers
    nld_from_myers = int(count_no_of_lines(commit_hash, parent_commit_hash, MYERS, '-', repository).decode('utf-8'))
    # NLD: Histogram
    nld_from_histogram = int(count_no_of_lines(commit_hash, parent_commit_hash, HISTOGRAM, '-', repository).decode('utf-8'))
    if nla_from_histogram != nla_from_myers and nld_from_histogram != nld_from_myers:
        diff_commits += 1

## Results

In [8]:
print(f'Total Commits: {total_commits} Difference: {diff_commits}')
print(f'%Different: {diff_commits/total_commits * 100}')

Total Commits: 836 Difference: 36
%Different: 4.30622009569378
