# Analyse der Webanwendung  "PetClinic"  
   
   

## _Priorisierung von Umbauarbeiten nach Nutzungsgrad_


## Einlesen der Nutzungsstatistiken

In [None]:
import pandas as pd

coverage = pd.read_csv("../../notebooks/datasets/jacoco_production_coverage_spring_petclinic.csv")
coverage = coverage[
                ['PACKAGE',
                 'CLASS',
                 'LINE_COVERED',
                 'LINE_MISSED']]
coverage.head()

### Berechnung zusätzlicher Messwerte und Schlüssel

In [None]:
coverage['lines'] = coverage['LINE_COVERED'] + coverage['LINE_MISSED']
coverage['ratio'] = coverage['LINE_COVERED'] / coverage['lines']
coverage['fqn'] = coverage['PACKAGE'] + "." + coverage['CLASS']
coverage[['fqn', 'ratio']].head()

### Laden der Daten in die Graphdatenbank

In [None]:
import py2neo
graph = py2neo.Graph()

query="""
    UNWIND {coverage_data} as coverage
    MATCH (t:Type {fqn : coverage.fqn})
    MERGE (t)-[:HAS_MEASURE]->(m)
    SET 
        m:Measure:Coverage, 
        m.ratio = coverage.ratio
    RETURN t.fqn as fqn, m.ratio as ratio
"""
coverage_dict = coverage.to_dict(orient='records')
result = graph.run(query, coverage_data=coverage_dict).data()
pd.DataFrame(result).head()

## Aggregation der Messwerte nach Subdomänen

In [None]:
query = """
MATCH 
  (t:Type)-[:BELONGS_TO]->(s:Subdomain),
  (t)-[:HAS_CHANGE]->(ch:Change),
  (t)-[:HAS_MEASURE]->(co:Coverage)
OPTIONAL MATCH
  (t)-[:HAS_BUG]->(b:BugInstance)
RETURN 
  s.name as ASubdomain,
  COUNT(DISTINCT t) as Types,
  COUNT(DISTINCT ch) as Changes,
  AVG(co.ratio) as Coverage,
  COUNT(DISTINCT b) as Bugs,
  SUM(DISTINCT t.lastMethodLineNumber) as Lines
ORDER BY Coverage ASC, Bugs DESC
"""

### Ergebnisse nach Subdomänen

In [None]:
result = pd.DataFrame(graph.run(query).data())
result

### Umbenennung nach geläufigen Begriffen

In [None]:
plot_data = result.copy().set_index('ASubdomain')
plot_data = plot_data.rename(
    columns= {
        "Changes" : "Investment",
        "Coverage" : "Utilization",
        "Lines" : "Size"})
plot_data

In [None]:
%matplotlib inline
from ausi.portfolio import plot_diagram

### Vier-Felder-Matrix zur Priorisierung nach Subdomänen

In [None]:
plot_diagram(plot_data, 'Investment', 'Utilization', 'Size');

## Aggregation der Messwerte nach technischen Aspekten

In [None]:
query = """
MATCH 
  (t:Type)-[:IS_A]->(ta:TechnicalAspect),
  (t)-[:HAS_CHANGE]->(ch:Change),
  (t)-[:HAS_MEASURE]->(co:Coverage)
OPTIONAL MATCH
  (t)-[:HAS_BUG]->(b:BugInstance)   
RETURN 
  ta.name as ATechnicalAspect,
  COUNT(DISTINCT t) as Types,
  COUNT(DISTINCT ch) as Investment,
  AVG(co.ratio) as Utilization,
  COUNT(DISTINCT b) as Bugs,
  SUM(DISTINCT t.lastMethodLineNumber) as Size
ORDER BY Utilization ASC, Bugs DESC
"""

### Ergebnisse nach technischen Aspekten

In [None]:
result = pd.DataFrame(graph.run(query).data()).set_index('ATechnicalAspect')
result

### Vier-Felder-Matrix zur Priorisierung nach technischen Aspekten

In [None]:
plot_diagram(result, 'Investment', 'Utilization', 'Size');

## Ende Demo