In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.figsize"] = (16, 3)
plt.rcParams["axes.grid"] = True
pd.options.display.float_format = '{:.2f}'.format

df = pd.read_csv("../csv/global_returns.csv", delimiter=';', parse_dates=['timestamp'], index_col=0)
ds = pd.read_csv("../csv/session.csv", delimiter=';', parse_dates=['timestamp'], index_col=0)

no_long_run_qery = '(name != "QueueConsumer.run()") and (name != "WebServer.serve()") and (~name.str.contains("pref"))'

In [None]:
names_dict = {
    'als.stack': 'stacker',
    'als.model.params': 'post-processor',
    'als.io.output': "image saver",
    'als.io.network': 'web server',
    'als.io.input': 'file watcher',
}

modules_thread_group = df[df.thread != 'MainThread'].groupby('module').thread.unique()
for module_name, thread_name in names_dict.items():
    if module_name in modules_thread_group:
        current_thread = modules_thread_group[module_name][0]
        df.loc[df.thread == current_thread, 'thread'] = thread_name

In [None]:
threads = df[df.thread != 'MainThread']

for current in threads.thread.unique():
    used_modules = threads.groupby('thread').get_group(current).module.unique()
    if 'als.io.input' in used_modules and 'als.processing' in used_modules:
        df.loc[df.thread == current, "thread"] = "pre-processor"
        break

In [None]:
print("Available data")
print("==============")
print(f"{df.shape[0]:>8} function returns")
print(f"{ds.shape[0]:>8} session datapoints")

In [None]:
print(f"ALS start time {str(df.index[0])}")
print(f"ALS stop time  {str(df.index[-1])}")

In [None]:
mem=ds[ds.type == 'memory'].copy()
mem['value'] = mem['value'].apply(lambda m: m/1024/1024)

mem.plot(title="Available memory",
    y="value",
    xlabel='',
    ylabel="MB",
    legend=False,
    figsize=(16, 4))
plt.show()

In [None]:
df["module"].value_counts().plot(kind="bar", title="Function calls by module")
plt.show()

In [None]:
data_c = df['thread'].value_counts()
data_t = df.query(no_long_run_qery).groupby(by=['thread']).sum('elapsed')
data_c = pd.DataFrame(data_c)
whole = pd.concat([data_c, data_t], axis=1).rename(columns={"thread": "calls"})
whole.transpose()

In [None]:
whole.plot(kind="pie", 
          title="Function calls by thread", 
          figsize=(6, 6), 
          ylabel="",
           legend="",
           y='calls',
          explode=list([0.1 for _ in range(len(data_c))]))

In [None]:
data_t.plot(
    kind="pie", title="elapsed time by thread", autopct='%1.1f%%', y='elapsed',
    legend=False, figsize=(6, 6), explode=list([0.1 for _ in range(len(data_t))]), ylabel='')
plt.show()

### Activity flow by module

In [None]:
plt.figure(figsize=(16, 6))
ax = sns.scatterplot(data=df,
       x='timestamp',
       y='module', hue='thread')
ax.set_title("Activity flow by module")
plt.show()

### Activity flow by thread

In [None]:
plt.figure(figsize=(16, 6))
ax = sns.scatterplot(data=df,
       x='timestamp',
       y='thread', hue='module')
ax.set_title("Activity flow by thread")
plt.show()

### Thread / Module affinity

In [None]:
plt.figure(figsize=(16, 6))

data = df.query(no_long_run_qery)
ax = sns.scatterplot(
    data=data,
    x='thread',
    y='module', 
    size='elapsed', 
    sizes=(50, 600),
    legend="")

ax.set_title("Thread / Module coupling")

plt.show()

### 20 most called functions

In [None]:
df['name'].value_counts().head(20)

### 20 longest function exection times on the main thread

In [None]:
df[df.thread == "MainThread"].sort_values(by=['elapsed'], ascending=False).head(20)

### TOP 20 longest function mean execution time 

In [None]:
data = df.query(no_long_run_qery)
data.groupby('name').mean().sort_values(by=['elapsed'], ascending=False).head(20).plot(kind='bar')
plt.show()