In [None]:
import os
import json
import pandas as pd
import numpy as np
from numpy import std, mean, sqrt
from scipy.stats import ttest_ind, mode, skew, kurtosistest
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go

In [None]:
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
img_path = "temp"
fields = ("sampleCount", "throughput", "errorPct", "meanResTime")

In [None]:
def get_all_subdirs(dir):
    for x in [name for name in os.listdir(dir)  if os.path.isdir(os.path.join(dir, name))]:
        try:
            with open((os.path.join(dir, x, "statistics.json")), "r", encoding="utf-8") as file:
                yield json.loads(file.read())["Total"]
        except FileNotFoundError:
            pass

In [None]:
def cohen_d(x,y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return (mean(x) - mean(y)) / sqrt(((nx-1)*std(x, ddof=1) ** 2 + (ny-1)*std(y, ddof=1) ** 2) / dof)

In [None]:
path_to_outputs = input("insert path to outputs directory")

In [None]:
path_to_java = os.path.join(path_to_outputs, "java_outs")
# java:
java_df = pd.DataFrame([x for x in get_all_subdirs(path_to_java)])
java_df.head()



In [None]:
path_to_python = os.path.join(path_to_outputs, "py_outs")
# python:
python_df = pd.DataFrame([x for x in get_all_subdirs(path_to_python)])


python_df.head()

In [None]:
def show_table(field):
    s, pval = ttest_ind(python_df[field], java_df[field])
    es = cohen_d(python_df[field], java_df[field])
    sk, k = (skew(python_df[field]), kurtosistest(python_df[field]))
    
    result_mat = [
        [field, 'p-value', 'effect size'],
        [(len(python_df[field]) +len(java_df[field])) //2 , round(pval, 3), round(es, 3)]
    ]

    swt_table = ff.create_table(result_mat)
    swt_table['data'][0].colorscale=[[0, '#2a3f5f'],[1, '#fffffb']]
    swt_table['layout']['height']=100

    for i in range(len(swt_table.layout.annotations)):
        swt_table.layout.annotations[i].font.size = 20

    swt_table.update_layout(title = field)
    png_name = os.path.join(img_path, f"{field}-table.png")
    swt_table.write_image(png_name)    
    swt_table.show()

def show_histograms(field):
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=python_df[field], name='python'))

    fig.add_trace(go.Histogram(x=java_df[field], name='java'))
    fig.update_layout(title = field)
    png_name = os.path.join(img_path, f"{field}-histogram.png")
    fig.write_image(png_name)
    fig.show()

def show_box(field):

    fig = go.Figure()
    fig.add_trace(go.Box(y=python_df[field], name="python"))
    fig.add_trace(go.Box(y=java_df[field], name = "java"))
    fig.update_layout(title = field)
    png_name = os.path.join(img_path, f"{field}-box.png")
    fig.write_image(png_name) 
    fig.show()

def show_diff_mods(field):
    pass
def plot(field):
    show_table(field)
    show_histograms(field)
    show_box(field)


In [None]:
for f in fields:
    plot(f)

In [None]:
def draw_correlation_matrix(df):
    df_corr = df.corr()

    x = list(df_corr.columns)
    y = list(df_corr.index)
    z = np.array(df_corr)

    fig = ff.create_annotated_heatmap(
        z,
        x = x,
        y = y ,
        annotation_text = np.around(z, decimals=2),
        hoverinfo='z',
        colorscale=px.colors.diverging.RdBu,
        zmin=-1,
        zmax=1
        )
    fig.show()



In [None]:
draw_correlation_matrix(python_df)

In [None]:
draw_correlation_matrix(java_df)