<tt> git log --all --numstat --pretty=format:'--%h--%ad--%ae--%aN' --no-renames > git.log
</tt>

Parameters

In [224]:
GIT_LOG_PATH = r'../../aim42_fork/'
GIT_LOG_REGEX = r'\.\./\.\./aim42_fork/'
GIT_LOG_FILE = r'git.log'
SOURCE_CODE_FILE_EXTENSION = ".adoc"

# Read Git log file

In [225]:
import pandas as pd
import git

g = git.Repo(GIT_LOG_PATH).git()
git_log = g.log('--all', '--numstat', '--no-renames', '--pretty=format:#%aN', '--', "*" + SOURCE_CODE_FILE_EXTENSION)
git_log[0:100]

'#Markus Harrer\n4\t1\tsrc/main/asciidoc/patterns/analyze/social-debt.adoc\n\n#Markus Harrer\n1\t1\tsrc/main/'

In [226]:
from io import StringIO

commits_raw = pd.read_csv(StringIO(git_log), 
    sep="#",
    header=None,
    names=['stats', 'author'])
commits_raw.head()

Unnamed: 0,stats,author
0,,Markus Harrer
1,4\t1\tsrc/main/asciidoc/patterns/analyze/socia...,
2,,Markus Harrer
3,1\t1\tsrc/main/asciidoc/patterns/analyze/socia...,
4,1\t0\tsrc/main/asciidoc/patterns/improve/butte...,


In [227]:
commit_metadata = commits_raw[['author']].fillna(method='ffill')
commit_metadata.head(5)

Unnamed: 0,author
0,Markus Harrer
1,Markus Harrer
2,Markus Harrer
3,Markus Harrer
4,Markus Harrer


In [228]:
file_info = commits_raw['stats'].dropna().str.split("\t", expand=True)
file_info.columns = ['additions', "deletions", "filename"]
file_info['additions'] = pd.to_numeric(file_info['additions'])
file_info['deletions'] = pd.to_numeric(file_info['deletions'])
file_info.head()

Unnamed: 0,additions,deletions,filename
1,4,1,src/main/asciidoc/patterns/analyze/social-debt...
3,1,1,src/main/asciidoc/patterns/analyze/social-debt...
4,1,0,src/main/asciidoc/patterns/improve/butterfly-m...
6,5,0,src/main/asciidoc/pattern-index.adoc
7,1,1,src/main/asciidoc/patterns/improve/anticorrupt...


In [229]:
commit_data = commit_metadata.join(file_info).dropna()
commit_data.head()

Unnamed: 0,author,additions,deletions,filename
1,Markus Harrer,4.0,1.0,src/main/asciidoc/patterns/analyze/social-debt...
3,Markus Harrer,1.0,1.0,src/main/asciidoc/patterns/analyze/social-debt...
4,Markus Harrer,1.0,0.0,src/main/asciidoc/patterns/improve/butterfly-m...
6,Markus Harrer,5.0,0.0,src/main/asciidoc/pattern-index.adoc
7,Markus Harrer,1.0,1.0,src/main/asciidoc/patterns/improve/anticorrupt...


# Import source files

In [230]:
import glob
file_paths = glob.glob(GIT_LOG_PATH + "/**/*" + SOURCE_CODE_FILE_EXTENSION, recursive=True)
file_paths[:5]

['../../aim42_fork\\src\\main\\asciidoc\\about.adoc',
 '../../aim42_fork\\src\\main\\asciidoc\\aim42-overview.adoc',
 '../../aim42_fork\\src\\main\\asciidoc\\aim42_introduction.adoc',
 '../../aim42_fork\\src\\main\\asciidoc\\analyze.adoc',
 '../../aim42_fork\\src\\main\\asciidoc\\crosscutting.adoc']

In [231]:
lengths = pd.DataFrame(file_paths, columns=["path"])
lengths.head()

Unnamed: 0,path
0,../../aim42_fork\src\main\asciidoc\about.adoc
1,../../aim42_fork\src\main\asciidoc\aim42-overv...
2,../../aim42_fork\src\main\asciidoc\aim42_intro...
3,../../aim42_fork\src\main\asciidoc\analyze.adoc
4,../../aim42_fork\src\main\asciidoc\crosscuttin...


In [232]:
def file_length(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return len(file.readlines())

lengths['length'] = lengths['path'].apply(file_length)
lengths.head()

Unnamed: 0,path,length
0,../../aim42_fork\src\main\asciidoc\about.adoc,34
1,../../aim42_fork\src\main\asciidoc\aim42-overv...,46
2,../../aim42_fork\src\main\asciidoc\aim42_intro...,158
3,../../aim42_fork\src\main\asciidoc\analyze.adoc,168
4,../../aim42_fork\src\main\asciidoc\crosscuttin...,199


In [233]:
GIT_LOG_PATH

'../../aim42_fork/'

In [235]:
lengths['filename'] = lengths['path'].\
    str.replace("\\","/").\
    str.replace(GIT_LOG_REGEX, "")
lengths.head()

Unnamed: 0,path,length,filename
0,../../aim42_fork\src\main\asciidoc\about.adoc,34,src/main/asciidoc/about.adoc
1,../../aim42_fork\src\main\asciidoc\aim42-overv...,46,src/main/asciidoc/aim42-overview.adoc
2,../../aim42_fork\src\main\asciidoc\aim42_intro...,158,src/main/asciidoc/aim42_introduction.adoc
3,../../aim42_fork\src\main\asciidoc\analyze.adoc,168,src/main/asciidoc/analyze.adoc
4,../../aim42_fork\src\main\asciidoc\crosscuttin...,199,src/main/asciidoc/crosscutting.adoc


In [236]:
commits = pd.merge(commit_data, lengths)
commits.head()

Unnamed: 0,author,additions,deletions,filename,path,length
0,Markus Harrer,4.0,1.0,src/main/asciidoc/patterns/analyze/social-debt...,../../aim42_fork\src\main\asciidoc\patterns\an...,31
1,Markus Harrer,1.0,1.0,src/main/asciidoc/patterns/analyze/social-debt...,../../aim42_fork\src\main\asciidoc\patterns\an...,31
2,Sven Johann,1.0,2.0,src/main/asciidoc/patterns/analyze/social-debt...,../../aim42_fork\src\main\asciidoc\patterns\an...,31
3,Damian A. Tamburri,29.0,0.0,src/main/asciidoc/patterns/analyze/social-debt...,../../aim42_fork\src\main\asciidoc\patterns\an...,31
4,Markus Harrer,1.0,0.0,src/main/asciidoc/patterns/improve/butterfly-m...,../../aim42_fork\src\main\asciidoc\patterns\im...,69


In [237]:
contributions = commits[['author', 'filename', 'additions', 'deletions', 'length']].dropna()
contributions.head()

Unnamed: 0,author,filename,additions,deletions,length
0,Markus Harrer,src/main/asciidoc/patterns/analyze/social-debt...,4.0,1.0,31
1,Markus Harrer,src/main/asciidoc/patterns/analyze/social-debt...,1.0,1.0,31
2,Sven Johann,src/main/asciidoc/patterns/analyze/social-debt...,1.0,2.0,31
3,Damian A. Tamburri,src/main/asciidoc/patterns/analyze/social-debt...,29.0,0.0,31
4,Markus Harrer,src/main/asciidoc/patterns/improve/butterfly-m...,1.0,0.0,69


In [238]:
additions_sum = contributions.groupby('filename').sum()[['additions']].reset_index()
additions_sum.head()

Unnamed: 0,filename,additions
0,src/main/asciidoc/about.adoc,83.0
1,src/main/asciidoc/aim42-overview.adoc,72.0
2,src/main/asciidoc/aim42_introduction.adoc,268.0
3,src/main/asciidoc/analyze.adoc,252.0
4,src/main/asciidoc/appendices/asciidoc-help.adoc,194.0


In [239]:
contributions_norm = pd.merge(contributions, additions_sum, left_on='filename', right_on='filename', suffixes=['', '_sum'])
contributions_norm.head()

Unnamed: 0,author,filename,additions,deletions,length,additions_sum
0,Markus Harrer,src/main/asciidoc/patterns/analyze/social-debt...,4.0,1.0,31,35.0
1,Markus Harrer,src/main/asciidoc/patterns/analyze/social-debt...,1.0,1.0,31,35.0
2,Sven Johann,src/main/asciidoc/patterns/analyze/social-debt...,1.0,2.0,31,35.0
3,Damian A. Tamburri,src/main/asciidoc/patterns/analyze/social-debt...,29.0,0.0,31,35.0
4,Markus Harrer,src/main/asciidoc/patterns/improve/butterfly-m...,1.0,0.0,69,157.0


In [240]:
grouped_commits = contributions_norm.groupby(
    ['filename', 'author']).agg(
    {'additions' : 'sum',
     'additions_sum' : 'first',
     'length' : 'first'})
grouped_commits.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,additions,additions_sum,length
filename,author,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
src/main/asciidoc/about.adoc,Alexander Heusingfeld,26.0,83.0,34
src/main/asciidoc/about.adoc,Gernot Starke,57.0,83.0,34
src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46
src/main/asciidoc/aim42_introduction.adoc,Alexander Heusingfeld,149.0,268.0,158
src/main/asciidoc/aim42_introduction.adoc,Gernot Starke,79.0,268.0,158


In [241]:
grouped_commits['ownership'] = grouped_commits['additions'] / grouped_commits['additions_sum']
grouped_commits.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,additions,additions_sum,length,ownership
filename,author,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
src/main/asciidoc/about.adoc,Alexander Heusingfeld,26.0,83.0,34,0.313253
src/main/asciidoc/about.adoc,Gernot Starke,57.0,83.0,34,0.686747
src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46,1.0
src/main/asciidoc/aim42_introduction.adoc,Alexander Heusingfeld,149.0,268.0,158,0.55597
src/main/asciidoc/aim42_introduction.adoc,Gernot Starke,79.0,268.0,158,0.294776


In [242]:
ownerships = grouped_commits.reset_index().groupby(['filename']).first()
ownerships.head()

Unnamed: 0_level_0,author,additions,additions_sum,length,ownership
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
src/main/asciidoc/about.adoc,Alexander Heusingfeld,26.0,83.0,34,0.313253
src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46,1.0
src/main/asciidoc/aim42_introduction.adoc,Alexander Heusingfeld,149.0,268.0,158,0.55597
src/main/asciidoc/analyze.adoc,Alexander Heusingfeld,119.0,252.0,168,0.472222
src/main/asciidoc/appendices/asciidoc-help.adoc,Alexander Heusingfeld,154.0,194.0,187,0.793814


In [243]:
plot_data = ownerships.reset_index()
plot_data.head()

Unnamed: 0,filename,author,additions,additions_sum,length,ownership
0,src/main/asciidoc/about.adoc,Alexander Heusingfeld,26.0,83.0,34,0.313253
1,src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46,1.0
2,src/main/asciidoc/aim42_introduction.adoc,Alexander Heusingfeld,149.0,268.0,158,0.55597
3,src/main/asciidoc/analyze.adoc,Alexander Heusingfeld,119.0,252.0,168,0.472222
4,src/main/asciidoc/appendices/asciidoc-help.adoc,Alexander Heusingfeld,154.0,194.0,187,0.793814


In [244]:
plot_data.loc[plot_data['ownership'] < 0.75, 'ownership']  = 0
plot_data['responsible']  = plot_data['author']
plot_data.loc[plot_data['ownership'] == 0, 'responsible']  = "None"
plot_data.head()

Unnamed: 0,filename,author,additions,additions_sum,length,ownership,responsible
0,src/main/asciidoc/about.adoc,Alexander Heusingfeld,26.0,83.0,34,0.0,
1,src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46,1.0,Gernot Starke
2,src/main/asciidoc/aim42_introduction.adoc,Alexander Heusingfeld,149.0,268.0,158,0.0,
3,src/main/asciidoc/analyze.adoc,Alexander Heusingfeld,119.0,252.0,168,0.0,
4,src/main/asciidoc/appendices/asciidoc-help.adoc,Alexander Heusingfeld,154.0,194.0,187,0.793814,Alexander Heusingfeld


In [245]:
colors = plot_data[['author']].drop_duplicates()
colors.head()

Unnamed: 0,author
0,Alexander Heusingfeld
1,Gernot Starke
20,Markus Harrer
39,Damian A. Tamburri
62,Klemens Dickbauer


In [246]:
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors

number_of_owners = len(plot_data['author'].unique())
rgb_colors = [
                matplotlib.colors.rgb2hex(x) 
                for x in cm.RdYlGn_r(
                    np.linspace(0,1,number_of_owners))
                ]
rgb_colors

['#006837', '#87cb67', '#fffebe', '#f88c51', '#a50026']

In [247]:
colors['color'] = rgb_colors
colors

Unnamed: 0,author,color
0,Alexander Heusingfeld,#006837
1,Gernot Starke,#87cb67
20,Markus Harrer,#fffebe
39,Damian A. Tamburri,#f88c51
62,Klemens Dickbauer,#a50026


In [248]:
plot_data = pd.merge(plot_data, colors, left_on='responsible', right_on='author')
plot_data.head()

Unnamed: 0,filename,author_x,additions,additions_sum,length,ownership,responsible,author_y,color
0,src/main/asciidoc/aim42-overview.adoc,Gernot Starke,72.0,72.0,46,1.0,Gernot Starke,Gernot Starke,#87cb67
1,src/main/asciidoc/appendices/domain-model.adoc,Gernot Starke,67.0,69.0,57,0.971014,Gernot Starke,Gernot Starke,#87cb67
2,src/main/asciidoc/organizational-scenarios.adoc,Gernot Starke,7.0,7.0,7,1.0,Gernot Starke,Gernot Starke,#87cb67
3,src/main/asciidoc/patterns/analyze/infrastruct...,Gernot Starke,41.0,41.0,39,1.0,Gernot Starke,Gernot Starke,#87cb67
4,src/main/asciidoc/patterns/category-improve-an...,Gernot Starke,6.0,6.0,6,1.0,Gernot Starke,Gernot Starke,#87cb67


# Visualizing
Export DataFrame into d3's flare format

In [249]:
import os
import json

json_data = {}
json_data['name'] = 'flare'
json_data['children'] = []
for row in plot_data.iterrows():
    series = row[1]
    path, filename = os.path.split(series['filename'])
    person = series['responsible']
    weigth = series['ownership']
    color = series['color']
    size = series['length']
    
    last_children = None
    children = json_data['children']
    
    for path_part in path.split("/"):
        entry = None
        
        for child in children:
            if "name" in child and child["name"] == path_part:
                entry = child
        if not entry:
            entry = {}
            children.append(entry)

        entry['name'] = path_part
        if not 'children' in entry: 
            entry['children'] = []
        
        children = entry['children']
        last_children = children
    
    last_children.append({
        'name' : filename + " [" + person + "]",
        'weight' : weigth,
        'size' : size,
        'author_color' : color})

with open ("vis/flare.json", mode='w', encoding='utf-8') as json_file:
    json_file.write(json.dumps(json_data, indent=3))