# Set up connection to Neo4J

Esablish connection to Neo4j graph database

In [1]:
from neo4j import GraphDatabase

URI = "bolt://localhost"
AUTH = ("neo4j", "neo4j")

driver = GraphDatabase.driver(URI, auth=AUTH)
driver.verify_connectivity()
session = driver.session()

Clean data from previous run

In [2]:
query="""
MATCH (a) -[r] -> () DELETE a, r
"""
session.run(query);

In [3]:
query="""
   MATCH (a) DELETE a
"""
session.run(query);

# Data import

## Import dependencies (from jdeps)

### Generating dataset

You can generate this kind of data with any tool that can show you dependencies between your classes. E.g. in Java, using `jdeps`:
    
    
`jdeps -e 'org.springframework.samples.petclinic.*' -v target/classes/ > spring_petclinic_deps.txt`

*Vorsicht: `jdeps` ist nur eine Annäherung an die Abhängigkeiten. Z. B. werden hier Typen, die in Generics verwendet werden, nicht angezeigt.*

### Show dataset

In [4]:
!head data/spring_petclinic_deps.txt

   org.springframework.samples.petclinic.model.NamedEntity -> org.springframework.samples.petclinic.model.BaseEntity classes
   org.springframework.samples.petclinic.model.Owner  -> org.springframework.samples.petclinic.model.Person classes
   org.springframework.samples.petclinic.model.Owner  -> org.springframework.samples.petclinic.model.Pet    classes
   org.springframework.samples.petclinic.model.Person -> org.springframework.samples.petclinic.model.BaseEntity classes
   org.springframework.samples.petclinic.model.Pet    -> org.springframework.samples.petclinic.model.NamedEntity classes
   org.springframework.samples.petclinic.model.Pet    -> org.springframework.samples.petclinic.model.Owner  classes
   org.springframework.samples.petclinic.model.Pet    -> org.springframework.samples.petclinic.model.PetType classes
   org.springframework.samples.petclinic.model.Pet    -> org.springframework.samples.petclinic.model.Visit  classes
   org.springframework.samples.petclinic.mode

### Import dataset to pandas

In [5]:
import pandas as pd

deps = pd.read_csv("data/spring_petclinic_deps.txt", names=["raw"], sep="\r")
deps.head()

Unnamed: 0,raw
0,org.springframework.samples.petclinic.model...
1,org.springframework.samples.petclinic.model...
2,org.springframework.samples.petclinic.model...
3,org.springframework.samples.petclinic.model...
4,org.springframework.samples.petclinic.model...


### Normalize data

In [6]:
# class entries begin with three whitespaces
deps = deps[deps['raw'].str.startswith("   ")]
# separates the source from the target
splitted = deps['raw'].str.split("->", n=1, expand=True)
# remove whitespaces from source and get rid of inner classes
deps['from'] = splitted[0].str.strip().str.split("\$").str[0]
# get the target and the artifact names
splitted_2 = splitted[1].str.split(" ", n=2)
# get also rid of inner classes
deps['to'] = splitted_2.str[1].str.split("\$").str[0]
deps['type'] = splitted_2.str[2].str.strip()
deps['name'] = deps['from'].str.split(".").str[-1]
deps.head()

Unnamed: 0,raw,from,to,type,name
0,org.springframework.samples.petclinic.model...,org.springframework.samples.petclinic.model.Na...,org.springframework.samples.petclinic.model.Ba...,classes,NamedEntity
1,org.springframework.samples.petclinic.model...,org.springframework.samples.petclinic.model.Owner,org.springframework.samples.petclinic.model.Pe...,classes,Owner
2,org.springframework.samples.petclinic.model...,org.springframework.samples.petclinic.model.Owner,org.springframework.samples.petclinic.model.Pet,classes,Owner
3,org.springframework.samples.petclinic.model...,org.springframework.samples.petclinic.model.Pe...,org.springframework.samples.petclinic.model.Ba...,classes,Person
4,org.springframework.samples.petclinic.model...,org.springframework.samples.petclinic.model.Pet,org.springframework.samples.petclinic.model.Na...,classes,Pet


### Transform data for source code file names into dictionary

In [7]:
names_data = deps[['from', 'name']].drop_duplicates().to_dict(orient='records')
names_data[:5]

[{'from': 'org.springframework.samples.petclinic.model.NamedEntity',
  'name': 'NamedEntity'},
 {'from': 'org.springframework.samples.petclinic.model.Owner',
  'name': 'Owner'},
 {'from': 'org.springframework.samples.petclinic.model.Person',
  'name': 'Person'},
 {'from': 'org.springframework.samples.petclinic.model.Pet', 'name': 'Pet'},
 {'from': 'org.springframework.samples.petclinic.model.PetType',
  'name': 'PetType'}]

### Import data into Neo4j

In [8]:
query="""
    UNWIND {data} as dep_name
    CREATE
        (t:Type)
    SET
        t.fqn = dep_name.from,
        t.name = dep_name.name
    RETURN t.fqn, t.name
"""
session.run(query, data=names_data).to_df().head()

Unnamed: 0,t.fqn,t.name
0,org.springframework.samples.petclinic.model.Na...,NamedEntity
1,org.springframework.samples.petclinic.model.Owner,Owner
2,org.springframework.samples.petclinic.model.Pe...,Person
3,org.springframework.samples.petclinic.model.Pet,Pet
4,org.springframework.samples.petclinic.model.Pe...,PetType


### Create index for `fqn` for faster queries

In [9]:
query="""
  CREATE INDEX ON :Type(fqn)
"""
session.run(query);

### Transform data for dependencies into a dictionary

In [10]:
deps_data = deps.to_dict(orient='records')
deps_data[:2]

[{'raw': '   org.springframework.samples.petclinic.model.NamedEntity -> org.springframework.samples.petclinic.model.BaseEntity classes',
  'from': 'org.springframework.samples.petclinic.model.NamedEntity',
  'to': 'org.springframework.samples.petclinic.model.BaseEntity',
  'type': 'classes',
  'name': 'NamedEntity'},
 {'raw': '   org.springframework.samples.petclinic.model.Owner  -> org.springframework.samples.petclinic.model.Person classes',
  'from': 'org.springframework.samples.petclinic.model.Owner',
  'to': 'org.springframework.samples.petclinic.model.Person',
  'type': 'classes',
  'name': 'Owner'}]

### Connect nodes that depend on each other

In [11]:
query="""
    UNWIND {data} as dep
    MATCH (from:Type {fqn : dep.from})
    MATCH (to:Type {fqn: dep.to})
    MERGE (from)-[:DEPENDS_ON]->(to)
    RETURN from.fqn, to.fqn
"""
session.run(query, data=deps_data).to_df().head()

Unnamed: 0,from.fqn,to.fqn
0,org.springframework.samples.petclinic.model.Owner,org.springframework.samples.petclinic.model.Pe...
1,org.springframework.samples.petclinic.model.Owner,org.springframework.samples.petclinic.model.Pet
2,org.springframework.samples.petclinic.model.Pet,org.springframework.samples.petclinic.model.Na...
3,org.springframework.samples.petclinic.model.Pet,org.springframework.samples.petclinic.model.Owner
4,org.springframework.samples.petclinic.model.Pet,org.springframework.samples.petclinic.model.Pe...


### Prepare results for dependency analysis

In [12]:
query="""
MATCH
    (t:Type)
WITH DISTINCT t
MATCH
    (type)-[:DEPENDS_ON*0..1]->(directDependency:Type)
RETURN type.fqn as name, COLLECT(DISTINCT directDependency.fqn) as imports
"""

json_data = session.run(query).to_df().to_json(orient="records")
print(json_data[:200])

[{"name":"org.springframework.samples.petclinic.model.Vet","imports":["org.springframework.samples.petclinic.model.Vet","org.springframework.samples.petclinic.model.Person","org.springframework.sample


### Visualize dependencies

In [13]:
with open ( "output/hierarchical-edge-bundling.json", mode='w') as json_file:
    json_file.write(json_data)

### Show link for visualization
<a href="vis/hierarchical-edge-bundling.html">hierarchical-edge-bundling.html</a>

## Import lines of code information

### Generate dataset

You can generate this data for various source code projects e.g. via `cloc`:

`src/main/java/cloc . --by-file --quiet --csv --out spring_petclinic_cloc.csv`


### Show dataset

In [14]:
!head data/spring_petclinic_cloc.csv

language,filename,blank,comment,code,"github.com/AlDanial/cloc v 1.82  T=0.19 s (244.7 files/s, 16290.7 lines/s)"
Java,./org/springframework/samples/petclinic/repository/jdbc/JdbcOwnerRepositoryImpl.java,19,41,98
Java,./org/springframework/samples/petclinic/model/Owner.java,22,35,96
Java,./org/springframework/samples/petclinic/web/OwnerController.java,18,32,85
Java,./org/springframework/samples/petclinic/web/PetController.java,15,20,78
Java,./org/springframework/samples/petclinic/repository/jdbc/JdbcPetRepositoryImpl.java,15,26,75
Java,./org/springframework/samples/petclinic/service/ClinicServiceImpl.java,18,21,74
Java,./org/springframework/samples/petclinic/repository/jdbc/OneToManyResultSetExtractor.java,15,74,70
Java,./org/springframework/samples/petclinic/model/Pet.java,20,22,69
Java,./org/springframework/samples/petclinic/repository/jdbc/JdbcVisitRepositoryImpl.java,17,29,57


### Import data

In [15]:
cloc = pd.read_csv("data/spring_petclinic_cloc.csv")[:-1].copy()
cloc.tail()

Unnamed: 0,language,filename,blank,comment,code,"github.com/AlDanial/cloc v 1.82 T=0.19 s (244.7 files/s, 16290.7 lines/s)"
42,Java,./org/springframework/samples/petclinic/reposi...,2,21,6,
43,Java,./org/springframework/samples/petclinic/web/pa...,1,3,1,
44,Java,./org/springframework/samples/petclinic/model/...,1,3,1,
45,Java,./org/springframework/samples/petclinic/reposi...,1,4,1,
46,Java,./org/springframework/samples/petclinic/reposi...,1,4,1,


### Normalize data
cloc delivers paths, but we need a full qualified name ("fqn") that matches with exiting data

In [16]:
cloc['fqn'] = cloc['filename'].str.replace("./", "", regex=False)\
                              .str.replace("/",".", regex=False)\
                              .str.replace(".java","", regex=False)
cloc.head()

Unnamed: 0,language,filename,blank,comment,code,"github.com/AlDanial/cloc v 1.82 T=0.19 s (244.7 files/s, 16290.7 lines/s)",fqn
0,Java,./org/springframework/samples/petclinic/reposi...,19,41,98,,org.springframework.samples.petclinic.reposito...
1,Java,./org/springframework/samples/petclinic/model/...,22,35,96,,org.springframework.samples.petclinic.model.Owner
2,Java,./org/springframework/samples/petclinic/web/Ow...,18,32,85,,org.springframework.samples.petclinic.web.Owne...
3,Java,./org/springframework/samples/petclinic/web/Pe...,15,20,78,,org.springframework.samples.petclinic.web.PetC...
4,Java,./org/springframework/samples/petclinic/reposi...,15,26,75,,org.springframework.samples.petclinic.reposito...


### Clean up dataset

In [17]:
loc = cloc[['fqn', 'code', 'comment', 'blank']].dropna().copy()
loc.head()

Unnamed: 0,fqn,code,comment,blank
0,org.springframework.samples.petclinic.reposito...,98,41,19
1,org.springframework.samples.petclinic.model.Owner,96,35,22
2,org.springframework.samples.petclinic.web.Owne...,85,32,18
3,org.springframework.samples.petclinic.web.PetC...,78,20,15
4,org.springframework.samples.petclinic.reposito...,75,26,15


### Generate dictionary

In [18]:
loc_data = loc.to_dict(orient='records')
loc_data[:2]

[{'fqn': 'org.springframework.samples.petclinic.repository.jdbc.JdbcOwnerRepositoryImpl',
  'code': 98,
  'comment': 41,
  'blank': 19},
 {'fqn': 'org.springframework.samples.petclinic.model.Owner',
  'code': 96,
  'comment': 35,
  'blank': 22}]

### Import into Neo4j

In [19]:
query="""
    UNWIND {data} as loc
    MATCH (t:Type {fqn : loc.fqn})
    SET
        t.lines = loc.code,
        t.comments = loc.comment,
        t.blanks = loc.blank
    RETURN t.fqn, t.name, t.lines, t.comments, t.blanks
"""

session.run(query, data=loc_data).to_df().head()

Unnamed: 0,t.fqn,t.name,t.lines,t.comments,t.blanks
0,org.springframework.samples.petclinic.reposito...,JdbcOwnerRepositoryImpl,98,41,19
1,org.springframework.samples.petclinic.model.Owner,Owner,96,35,22
2,org.springframework.samples.petclinic.web.Owne...,OwnerController,85,32,18
3,org.springframework.samples.petclinic.web.PetC...,PetController,78,20,15
4,org.springframework.samples.petclinic.reposito...,JdbcPetRepositoryImpl,75,26,15


## Import usage data

### Generate dataset

E.g. via coverage tools like JaCoCo you can get a glimpse on what's happening during the usage of your application.

See here for more details: https://www.feststelltaste.de/visualizing-production-coverage-with-jacoco-pandas-and-d3/

### Show dataset

In [20]:
!head data/spring_petclinic_production_coverage_data.csv

PACKAGE,CLASS,LINE_MISSED,LINE_COVERED
org.springframework.samples.petclinic,PetclinicInitializer,0,24
org.springframework.samples.petclinic.model,NamedEntity,1,4
org.springframework.samples.petclinic.model,Specialty,0,1
org.springframework.samples.petclinic.model,PetType,0,1
org.springframework.samples.petclinic.model,Vets,4,0
org.springframework.samples.petclinic.model,Visit,0,12
org.springframework.samples.petclinic.model,BaseEntity,0,5
org.springframework.samples.petclinic.model,Person,0,7
org.springframework.samples.petclinic.model,Owner,14,26


### Import dataset

In [21]:
coverage = pd.read_csv("data/spring_petclinic_production_coverage_data.csv")
coverage.head()

Unnamed: 0,PACKAGE,CLASS,LINE_MISSED,LINE_COVERED
0,org.springframework.samples.petclinic,PetclinicInitializer,0,24
1,org.springframework.samples.petclinic.model,NamedEntity,1,4
2,org.springframework.samples.petclinic.model,Specialty,0,1
3,org.springframework.samples.petclinic.model,PetType,0,1
4,org.springframework.samples.petclinic.model,Vets,4,0


### Enrich data
Calculate the percentage of executed lines of code per class

In [22]:
coverage['lines'] = coverage.LINE_COVERED + coverage.LINE_MISSED
coverage['ratio'] = coverage.LINE_COVERED / coverage.lines
coverage.head()

Unnamed: 0,PACKAGE,CLASS,LINE_MISSED,LINE_COVERED,lines,ratio
0,org.springframework.samples.petclinic,PetclinicInitializer,0,24,24,1.0
1,org.springframework.samples.petclinic.model,NamedEntity,1,4,5,0.8
2,org.springframework.samples.petclinic.model,Specialty,0,1,1,1.0
3,org.springframework.samples.petclinic.model,PetType,0,1,1,1.0
4,org.springframework.samples.petclinic.model,Vets,4,0,4,0.0


### Normalize data

In [23]:
coverage['fqn'] = coverage["PACKAGE"] + "." + coverage["CLASS"]
coverage.head()

Unnamed: 0,PACKAGE,CLASS,LINE_MISSED,LINE_COVERED,lines,ratio,fqn
0,org.springframework.samples.petclinic,PetclinicInitializer,0,24,24,1.0,org.springframework.samples.petclinic.Petclini...
1,org.springframework.samples.petclinic.model,NamedEntity,1,4,5,0.8,org.springframework.samples.petclinic.model.Na...
2,org.springframework.samples.petclinic.model,Specialty,0,1,1,1.0,org.springframework.samples.petclinic.model.Sp...
3,org.springframework.samples.petclinic.model,PetType,0,1,1,1.0,org.springframework.samples.petclinic.model.Pe...
4,org.springframework.samples.petclinic.model,Vets,4,0,4,0.0,org.springframework.samples.petclinic.model.Vets


### Import data into Neo4j

In [24]:
query="""
    UNWIND {data} as coverage
    MATCH (t:Type {fqn : coverage.fqn})
    MERGE (t)-[:HAS_MEASURE]->(m)
    SET 
        m:Measure:Coverage,
        m.ratio = coverage.ratio,
        m.lines = coverage.lines
    RETURN t.fqn as fqn, m.ratio as ratio, m.lines as lines
"""

session.run(query, data=coverage.to_dict(orient='records')).to_df().head()

Unnamed: 0,fqn,ratio,lines
0,org.springframework.samples.petclinic.model.Na...,0.8,5
1,org.springframework.samples.petclinic.model.Sp...,1.0,1
2,org.springframework.samples.petclinic.model.Pe...,1.0,1
3,org.springframework.samples.petclinic.model.Vets,0.0,4
4,org.springframework.samples.petclinic.model.Visit,1.0,12


# Check data

## Query Nodes

### List measures

In [25]:
query="""
   MATCH (n:Type)-[:HAS_MEASURE]->(m:Measure)
   RETURN n.fqn as fqn, n.lines as lines, m.ratio as ratio
"""

module_options = session.run(query).to_df()
module_options.head()

Unnamed: 0,fqn,lines,ratio
0,org.springframework.samples.petclinic.model.Na...,18,0.8
1,org.springframework.samples.petclinic.model.Sp...,7,1.0
2,org.springframework.samples.petclinic.model.Pe...,7,1.0
3,org.springframework.samples.petclinic.model.Vets,16,0.0
4,org.springframework.samples.petclinic.model.Visit,43,1.0


# Explore modularization options

## Explore existing modularization

### Extract existing main module structure

In [26]:
module_options['base_module'] = module_options['fqn'].str.split(".").str[4]
module_options.head()

Unnamed: 0,fqn,lines,ratio,base_module
0,org.springframework.samples.petclinic.model.Na...,18,0.8,model
1,org.springframework.samples.petclinic.model.Sp...,7,1.0,model
2,org.springframework.samples.petclinic.model.Pe...,7,1.0,model
3,org.springframework.samples.petclinic.model.Vets,16,0.0,model
4,org.springframework.samples.petclinic.model.Visit,43,1.0,model


### Add base module information to graph

In [27]:
query="""
    UNWIND {data} as module
    MATCH (t:Type {fqn : module.fqn})
    MERGE (t)-[:BELONGS_TO]->(m)
    SET 
        m:Base:Module,
        m.name = module.base_module
    RETURN t.fqn as fqn, m.name
"""
session.run(query, data=module_options.to_dict(orient='records')).to_df().head()

Unnamed: 0,fqn,m.name
0,org.springframework.samples.petclinic.model.Na...,model
1,org.springframework.samples.petclinic.model.Sp...,model
2,org.springframework.samples.petclinic.model.Pe...,model
3,org.springframework.samples.petclinic.model.Vets,model
4,org.springframework.samples.petclinic.model.Visit,model


### Query for basic statisics

In [28]:
query="""
    MATCH (t:Type)-[:BELONGS_TO]->(m:Base:Module)
    RETURN m.name as module_name, count(t) as classes
"""

session.run(query).to_df().head()

Unnamed: 0,module_name,classes
0,model,9
1,service,1
2,web,6
3,util,1
4,repository,12


### Add base module dependencies to graph

In [29]:
query = """
    MATCH (m1:Base:Module)<-[:BELONGS_TO]-(t1:Type)<-[:DEPENDS_ON]-(t2:Type)-[:BELONGS_TO]->(m2:Base:Module)
    MERGE (m2)-[:USES]->(m1)
    RETURN DISTINCT(m2.name) as module, m1.name as dependent_module, COUNT(t2) as dependencies, SUM(t2.lines) as lines
"""
base_module_dependencies = session.run(query).to_df()
base_module_dependencies.head()

Unnamed: 0,module,dependent_module,dependencies,lines
0,model,model,12,629
1,service,model,5,370
2,web,model,10,510
3,web,web,1,78
4,repository,model,18,929


### Export data for visualization

In [30]:
import json
json_data = base_module_dependencies.to_dict(orient='split')['data']
with open ( "output/chord-diagram.json", mode='w') as json_file:
    json_file.write(json.dumps(json_data, indent=3))
json_data

[['model', 'model', 12, 629],
 ['service', 'model', 5, 370],
 ['web', 'model', 10, 510],
 ['web', 'web', 1, 78],
 ['repository', 'model', 18, 929],
 ['repository', 'util', 3, 221],
 ['repository', 'repository', 9, 465]]

### Analyze fan in and fan out

In [31]:
query="""
MATCH (t1:Type)-[:BELONGS_TO]->(m1:Module)
WITH m1, t1
MATCH (t1)-[:DEPENDS_ON]->(t2:Type)
WHERE NOT (t2)-[:BELONGS_TO]->(m1)
WITH m1, t1, count(DISTINCT t2) AS totalTypes,
     size(()-[:DEPENDS_ON]->(t1)) AS fanIn,
     size((t1)-[:DEPENDS_ON]->()) AS fanOut
RETURN m1.name AS moduleName, 
       count(DISTINCT t1) AS totalTypesInModule, 
       sum(fanIn) AS totalFanIn, 
       sum(fanOut) AS totalFanOut

"""
session.run(query).to_df()

Unnamed: 0,moduleName,totalTypesInModule,totalFanIn,totalFanOut
0,model,7,55,12
1,service,1,0,10
2,web,6,1,16
3,repository,12,10,41


In [32]:
query="""
    MATCH (currentModule:Domain:Module)<-[:BELONGS_TO]-(dependent:Type)
        <-[:DEPENDS_ON]-(dependency:Type)-[:BELONGS_TO]->(otherModule:Domain:Module)
    RETURN currentModule.name as module, COUNT(DISTINCT(dependent)) as exposed
"""
visible = session.run(query).to_df()        
visible

Unnamed: 0,module,exposed


In [33]:
query="""
    MATCH (currentModule:Domain:Module)<-[:BELONGS_TO]-(type:Type)
    WHERE NOT (type:Type)<-[:DEPENDS_ON]-(:Type)-[:BELONGS_TO]->(:Domain:Module)
    RETURN currentModule.name as module, COUNT(DISTINCT(type)) as hidden
"""
visible = session.run(query).to_df()        
visible

Unnamed: 0,module,hidden


In [34]:
query="""
MATCH (currentModule:Domain:Module)<-[:BELONGS_TO]-(type:Type)
OPTIONAL MATCH (type)<-[:DEPENDS_ON]-(dependency:Type)-[:BELONGS_TO]->(otherModule:Domain:Module)
WITH
    currentModule,
    COUNT(DISTINCT dependency) as exposed,
    SIZE((currentModule)<-[:BELONGS_TO]-(:Type)) as closed,
    COUNT(type) as numberOfTypes
RETURN DISTINCT(currentModule.name) as module, SUM(exposed) as visible, SUM(closed) as hidden, SUM(closed) * 1.0 / COUNT(numberOfTypes)*1.0
""" 
visible = session.run(query).to_df()        
visible

Unnamed: 0,module,visible,hidden,SUM(closed) * 1.0 / COUNT(numberOfTypes)*1.0


## Explore alternative modularization

In [35]:
module_options.head()

Unnamed: 0,fqn,lines,ratio,base_module
0,org.springframework.samples.petclinic.model.Na...,18,0.8,model
1,org.springframework.samples.petclinic.model.Sp...,7,1.0,model
2,org.springframework.samples.petclinic.model.Pe...,7,1.0,model
3,org.springframework.samples.petclinic.model.Vets,16,0.0,model
4,org.springframework.samples.petclinic.model.Visit,43,1.0,model


In [36]:
domain_parts = ["Owner", "Pet", "Visit", "Vet", "Specialty", "Clinic"]

for domain_part in domain_parts:
    module_options.loc[module_options['fqn'].str.contains(domain_part), 'domain_part'] = domain_part

module_options.head()

Unnamed: 0,fqn,lines,ratio,base_module,domain_part
0,org.springframework.samples.petclinic.model.Na...,18,0.8,model,
1,org.springframework.samples.petclinic.model.Sp...,7,1.0,model,Specialty
2,org.springframework.samples.petclinic.model.Pe...,7,1.0,model,Pet
3,org.springframework.samples.petclinic.model.Vets,16,0.0,model,Vet
4,org.springframework.samples.petclinic.model.Visit,43,1.0,model,Visit


In [37]:
domain_part_mapping = {
    "Visit" : "Checkup",
    "Pet" : "Patient",
    "Owner" : "Patient",
    "Vet" : "Doctor",
    "Specialty" : "Doctor"
} 
    
module_options['domain'] = module_options['domain_part'].map(domain_part_mapping).fillna("Framework")
module_options.head()

Unnamed: 0,fqn,lines,ratio,base_module,domain_part,domain
0,org.springframework.samples.petclinic.model.Na...,18,0.8,model,,Framework
1,org.springframework.samples.petclinic.model.Sp...,7,1.0,model,Specialty,Doctor
2,org.springframework.samples.petclinic.model.Pe...,7,1.0,model,Pet,Patient
3,org.springframework.samples.petclinic.model.Vets,16,0.0,model,Vet,Doctor
4,org.springframework.samples.petclinic.model.Visit,43,1.0,model,Visit,Checkup


In [38]:
query="""
    UNWIND {data} as module
    MATCH (t:Type {fqn : module.fqn})
    MERGE (t)-[:BELONGS_TO]->(m)
    SET 
        m:Domain:Module,
        m.name = module.domain
    RETURN t.fqn as fqn, m.name
"""

session.run(query, data=module_options.to_dict(orient='records')).to_df().head()

Unnamed: 0,fqn,m.name
0,org.springframework.samples.petclinic.model.Na...,Framework
1,org.springframework.samples.petclinic.model.Sp...,Doctor
2,org.springframework.samples.petclinic.model.Pe...,Patient
3,org.springframework.samples.petclinic.model.Vets,Doctor
4,org.springframework.samples.petclinic.model.Visit,Checkup


### Add base module dependencies to graph

In [39]:
query = """
    MATCH (m1:Domain:Module)<-[:BELONGS_TO]-(t1:Type)<-[:DEPENDS_ON]-(t2:Type)-[:BELONGS_TO]->(m2:Domain:Module)
    MERGE (m2)-[:USES]->(m1)
    RETURN DISTINCT(m2.name) as module, m1.name as dependent_module, COUNT(t2) as dependencies, SUM(t2.lines) as lines
"""
domain_module_dependencies = session.run(query).to_df()
domain_module_dependencies.head()

Unnamed: 0,module,dependent_module,dependencies,lines
0,Doctor,Doctor,6,213
1,Checkup,Patient,7,303
2,Patient,Patient,23,1458
3,Patient,Framework,5,345
4,Patient,Checkup,2,167


In [40]:
query="""
MATCH (t1:Type)-[:BELONGS_TO]->(m1:Domain:Module)
WITH m1, t1
MATCH (t1)-[:DEPENDS_ON]->(t2:Type)
WHERE NOT (t2)-[:BELONGS_TO]->(m1)
WITH m1, t1, count(DISTINCT t2) AS totalTypes,
     size(()-[:DEPENDS_ON]->(t1)) AS fanIn,
     size((t1)-[:DEPENDS_ON]->()) AS fanOut
RETURN m1.name AS moduleName, 
       count(DISTINCT t1) AS totalTypesInModule, 
       sum(fanIn) AS totalFanIn, 
       sum(fanOut) AS totalFanOut

"""
session.run(query).to_df()

Unnamed: 0,moduleName,totalTypesInModule,totalFanIn,totalFanOut
0,Doctor,6,11,13
1,Checkup,6,13,18
2,Patient,13,42,38
3,Framework,1,0,10


In [41]:
query="""
    MATCH (t:Type)-[:BELONGS_TO]->(m:Domain:Module)
    RETURN m.name as module_name, count(t) as classes
"""

session.run(query).to_df().head()

Unnamed: 0,module_name,classes
0,Doctor,6
1,Checkup,6
2,Framework,4
3,Patient,13


### Add base module dependencies to graph

In [42]:
query = """
    MATCH (m1:Domain:Module)<-[:BELONGS_TO]-(t1:Type)<-[:DEPENDS_ON]-(t2:Type)-[:BELONGS_TO]->(m2:Domain:Module)
    RETURN DISTINCT(m2.name) as module, m1.name as dependent_module, COUNT(t2) as dependencies
"""
domain_module_dependencies = session.run(query).to_df()
json_data = domain_module_dependencies.to_dict(orient='split')['data']
with open ( "output/chord-diagram.json", mode='w') as json_file:
    json_file.write(json.dumps(json_data, indent=3))
json_data

[['Doctor', 'Doctor', 6],
 ['Checkup', 'Patient', 7],
 ['Patient', 'Patient', 23],
 ['Patient', 'Framework', 5],
 ['Patient', 'Checkup', 2],
 ['Doctor', 'Framework', 3],
 ['Framework', 'Patient', 3],
 ['Framework', 'Doctor', 1],
 ['Framework', 'Checkup', 1],
 ['Checkup', 'Checkup', 7]]

### Prepare results for dependency analysis

In [43]:
query="""
MATCH (m:Domain:Module)-[:DEPENDS_ON]->(m_dep:Domain:Module)
RETURN m.name as name, COLLECT(DISTINCT m_dep.name) as imports
"""

json_data = session.run(query).to_df().to_json(orient="records")
print(json_data[:200])

[]


### Visualize dependencies

In [44]:
with open ( "output/hierarchical-edge-bundling.json", mode='w') as json_file:
    json_file.write(json_data)

### Show link for visualization
<a href="vis/hierarchical-edge-bundling.html">hierarchical-edge-bundling.html</a>