In [2]:
pip install pygal

Collecting pygalNote: you may need to restart the kernel to use updated packages.

  Downloading pygal-3.0.0-py2.py3-none-any.whl (129 kB)
Installing collected packages: pygal
Successfully installed pygal-3.0.0


In [1]:
import pygal as pg
from string import Template
from IPython.core.display import display, HTML

%load_ext cypher
%config CypherMagic.uri='http://neo4j:neo@localhost:7474/db/data'

ModuleNotFoundError: No module named 'pygal'

In [None]:
base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

# Analysing organisational structures with Software Analytics

## Question

<center>Which developers is the expert for which domain?</center>

## Data Sources

* Java structures of the Shopizer system scanned by jQAssistant and available in Neo4j
* Git history of the Shopizer system scanned by jQAssistant and available in Neo4j


* identification of functional components in the source code is required (see 01)
* matching between developer and functional component can be accomplished

## Heuristics

* all source code committers are stull part of the team
* developers may be assigned to multiple functional components

## Validation

* tabular overview of the commiters and the functional components they have already worked on
* tabular overview of the top-committer per functional component (domain expert)


* plausibility check of the results with the team (functional vs technical contribution)

## Implementation

* identification of the functional components (siehe notebook 01)
* identification of the committers and removal of duplicates

In [None]:
%%cypher
// List of authors
MATCH  (author:Author)
RETURN author.name AS Name, author.email AS EMail

In [None]:
%%cypher
// Duplicate removal (manual post processing)
WITH [
  ["Carl Samson", "csamson777@yahoo.com", "c.samson@cgi.com"],
  ["Carl Samson", "csamson777@yahoo.com", "carlsamson@Carls-MacBook-Pro-2.local"],
  ["Umesh Awasthi", "UAwasthi@rccl.com", "umeshawasthi@gmail.com"]
] AS authors
UNWIND authors AS duplicateAuthor
MATCH (author:Author{email: duplicateAuthor[1]}),
      (duplicate:Author{email: duplicateAuthor[2]})
SET author.name = duplicateAuthor[0]      
WITH author, duplicate
MATCH (duplicate)-[:COMMITTED]->(c:Commit)
MERGE (author)-[:COMMITTED]->(c)
DETACH DELETE duplicate
RETURN author.name AS AuthorName, author.email AS AuthorMail, count(DISTINCT duplicate) AS Duplicates

In [None]:
commitsPerAuthor = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
                                 (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
                           WHERE NOT c:Merge \
                           WITH a, count(DISTINCT c) AS Commits \
                           WHERE Commits > 1 \
                           RETURN a.name as Entwickler, Commits \
                           ORDER BY Commits DESC

commitsPerAuthor_df = commitsPerAuthor.get_dataframe()

#visualization

bar_chart = pg.Bar(show_legend=True, human_readable=True, 
fill=True, legend_at_bottom=True, legend_at_bottom_columns=2)
bar_chart.title = 'Entwickler mit den meisten Commits'
for index, row in commitsPerAuthor_df.iterrows():
     bar_chart.add(row['Entwickler'],[{"value": row['Commits']}])
display(HTML(base_html.format(rendered_chart=bar_chart.render(is_unicode=True))))

In [None]:
%%cypher
// Identifying all Shopizer nodes (duplicates query from notebook 00)
MATCH (artifact:Main:Artifact{group: "com.shopizer"})
SET artifact:Shopizer
WITH artifact
MATCH (artifact)-[:CONTAINS]->(c)
SET c:Shopizer

In [None]:
%%cypher
// Creating a node per functional component (duplicates query from notebook 01)
MATCH    (p:Package:Shopizer)-[:CONTAINS]->(bC:Package:Shopizer)
WHERE    p.fqn = "com.salesmanager.core.business.services"
WITH     collect(DISTINCT bC.name) AS boundedContexts
UNWIND   boundedContexts AS boundedContext
MERGE    (bC:BoundedContext {name: boundedContext})

In [None]:
%%cypher
// Assigning all types to their respective bounded contexts (duplicates query from notebook 01) 
MATCH    (bC:BoundedContext),
         (p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer)
WHERE    p.name = bC.name
MERGE    (bC)-[:CONTAINS]->(t)

## Results

In [None]:
%%cypher
// Committers per bounded context
MATCH (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
      (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
      (a:Author)-[:COMMITTED]->(c)
WHERE NOT c:Merge
RETURN bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc

In [None]:
%%cypher
// Top-committer per bounded context
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, collect(Author)[..1] AS TopAuthor
UNWIND   TopAuthor AS Author
RETURN   BoundedContext, Author

## Next Steps

* evaluation of the results with the team
* domain specific trainings to increase knowledge and to establish domain experts