# Code Hotspot-Analyse

## Datensammlung

### Metrik 1: Anzahl der Zeilen pro Quellcodedatei

In [None]:
!find spring-framework-petclinic/ -name '*.java' | xargs wc -l | head -n-1 > lines.txt

In [None]:
import pandas as pd

lines = pd.read_csv("lines.txt", sep=r'\s+', names=['lines', 'filepath'])
lines['filepath'] = lines['filepath'].str.replace("spring-framework-petclinic/", "")
lines = lines.set_index('filepath')
lines.head()

### Metrik 2: Änderungsinformationen

In [None]:
!git -C spring-framework-petclinic log --name-only --pretty=format: -- *.java > changes.txt

In [None]:
each_change_per_file = pd.read_csv("changes.txt", names=["filepath"])
changes = pd.DataFrame(each_change_per_file['filepath'].value_counts())
changes.columns =['changes']
changes.head()

### Metrik 3: Hotspots

In [None]:
hotspots = changes.join(lines).dropna()
hotspots.head()

## Erste Auswertungen

Zeige die Top 10 der meist geänderten Dateien an.

In [None]:
hotspots.sort_values(by="changes", ascending=False).head(10)[['changes']]

Zeige die Top 10 der größten Dateien an.

In [None]:
hotspots.sort_values(by="lines", ascending=False).head(10)[['lines']]

Berechne und zeige Top 10 Hotspots an.

In [None]:
hotspots["rel_changes"] = hotspots['changes'] / hotspots['changes'].max()
hotspots["rel_lines"] = hotspots['lines'] / hotspots['lines'].max()
hotspots['rel_hotspot'] = hotspots["rel_changes"] + hotspots["rel_lines"]
hotspots.sort_values(by="rel_hotspot", ascending=False)[["rel_hotspot"]].head(10)

## Visualisierung

Hinweis: die einzelnen Code-Blöcke sind nur Implementierungsdetails zur Ausgabe der Visualisierung und daher nicht leserelevant.

In [None]:
from matplotlib import cm
from matplotlib.colors import rgb2hex
import json
import os


# Daten für Visualisierung aufbereiten
def create_plot_data(df, color_column_name, size_column_name, seperator):
    plot_data = pd.DataFrame(index=df.index)
    plot_data['value_for_color'] = df[color_column_name]
    plot_data['ratio_for_color'] = plot_data['value_for_color'] / plot_data['value_for_color'].max()
    plot_data['color'] = plot_data['ratio_for_color'].apply(lambda x : rgb2hex(cm.coolwarm(x)))
    plot_data['size'] = df[size_column_name]
    plot_data[['path', 'name']] = df.index.str.rsplit(seperator, n=1).to_list()
    plot_data['path_list'] = plot_data['path'].str.split(seperator)
    return plot_data


# in D3-Datenformat umwandeln
def create_flare_json(df):

    json_data = {'name': 'flare', 'children': []}

    for _, series in df.iterrows():
        hierarchical_data = series['path_list']

        children = json_data['children']
        for part in hierarchical_data:
            entry = next((child for child in children if child.get('name', '') == part), None)
            if not entry:
                entry = {'name': part, 'children': []}
                children.append(entry)
            children = entry['children']

        children.append({
            'name': f"{series['name']} ({series['size']} [{series['value_for_color']}])",
            'size': series['size'],
            'color': series['color']
        })

    return json_data


# Template mit Daten füttern
def create_hotspot_file(hotspots, color_column_name, size_column_name, separator):
    json_data = create_flare_json(create_plot_data(hotspots, color_column_name, size_column_name,separator))
            
    with open("d3/template_hierarchical_d3_inline.html") as html_template:
        html = html_template.read().replace("###FLARE_JSON###", str(json_data))
        
        os.makedirs("output", exist_ok=True)
        with open(f'hotspots.html', mode='w') as html_out:
            html_out.write(html)

### Erzeuge Hotspot-Visualisierung

In [None]:
create_hotspot_file(hotspots, "changes", "lines", "/")