In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as patches
import seaborn as sns
import numpy as np
import os
import glob 
import re
from collections import defaultdict
from scipy import stats
import subprocess
from math import floor, ceil
import requests

plt.style.use('ggplot')

In [None]:
def interactiveCountWindows(N, WS, WA):
    windowIndex = 0
    windowCounter = 0
    while (windowIndex+WS) <= N:
        print(f'{windowCounter}: {windowIndex} - {windowIndex+WS}')
        windowIndex += WA
        windowCounter += 1
    print(f'Counter = {windowCounter}')

def countWindows(N, WS, WA):
    return ((N-WS)//WA) + 1
    
# Regular GeneaLog
# N: Total aggregate provenance tuples alive
# WS: Window size
# WA: Window Slide
def countGLPointers(N, WS, WA):
    return np.minimum(WS*countWindows(N, WS, WA), N) + 2 * countWindows(N, WS, WA)

# GeneaLog with Lists
def countGLLSPointers(N, WS, WA):
    return (WS+1)*countWindows(N, WS, WA)

def testRelation(WS, WA):
    if WA <= WS:
        return (WS+1)/(WA+2) 
    else:
        return (WS + 1)/(WS+2)

In [None]:
N = 1e6
WS = 100
K = np.random.randint(1, N//WS)
#K = WS
assert K*WS <= N, 'K is too large!'
WA = np.arange(floor(0.02*WS), 2*WS, 1)
#K = np.random.randint(1, N//WS, size=WA.shape[0])
fig, ax = plt.subplots()

waws = WA/WS
glValues = countGLPointers(N, WS, WA)
normValue = np.max(glValues)
gllsValues = countGLLSPointers(N, WS, WA)
ax.plot(waws, glValues / normValue, label='GL Pointer')
ax.plot(waws, gllsValues / normValue, label='GL List')
ax.plot(waws, glValues*[testRelation(WS, iwa) for iwa in WA]/normValue, label='Analytical Approximation')
ax.legend()
ax.set_yscale('log')
ax.set_xlabel('$WS/WA$')
ax.set_ylabel('Relative Memory')
ax.set_title('Memory Consumption Comparison')
fig.savefig('memcmp.pdf')
plt.show()

In [None]:
dataframes = []
N = 1e4
tupleNumbers = [1e4, 1e5, 1e6]
for N in tupleNumbers:
    windowSizes = [floor(x) for x in np.logspace(0, 4, 5)]
    for WS in windowSizes:
        WA = np.linspace(floor(0.05*WS), WS, 5)
        R = countGLLSPointers(N, WS, WA)/countGLPointers(N, WS, WA)
        data = pd.DataFrame({'N': np.repeat(N, WA.shape[0]),
                    'WS': np.repeat(WS, WA.shape[0]),
                   'WA/WS': np.round(WA/WS, 3),
                   'Ratio': R
                  })
        dataframes.append(data)
df = pd.concat(dataframes, ignore_index=True)

In [None]:
g = sns.catplot(x='WA/WS', y='Ratio', hue='WS', row='N', data=df, kind='bar')
g.fig.autofmt_xdate()
# ax.set_title('Memory Ratio: $\\frac{Arrays}{PointersList}$')

In [None]:
REF_SIZE = 1 # tuple size, bytes
N = 1000 # number of tuples
m = N//10 # 
s = pd.Series(np.arange(floor(0.1*m), m, 5)) # Slide values

def gl(m, s):
    return ((N-m)/s) + N

def lst(m, s):
    return ((N-m)/s) * m

x = s/m
glVal = gl(m, s)
lstVal = lst(m, s)
fig, ax = plt.subplots()
ax.plot(x, glVal)
ax.plot(x, lstVal)
ax.set(xlabel=r'Window Slide (%)', ylabel=r'Relative Memory Consumption')
ax.legend(['Genealog', 'List'])
ax.set_title(r'Memory Consumption of window pointers for $10^6$ in-memory tuples')


In [None]:
taskManagers = requests.get('http://localhost:9000/taskmanagers').json()
ids = {tm['id']: tm['path'] for tm in taskManagers['taskmanagers']}
ids

In [None]:
import re
for key, path in ids.items():
    print(requests.get(f'http://localhost:9000/taskmanagers/{key}/metrics?get=Status.JVM.Memory.NonHeap.Used').json())
    print(re.match(r'.*\/\/([^\/]+)\/.*', path).group(1))
    