In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
import scipy
import scipy.stats
from statsmodels.stats.multitest import multipletests


def filter_functions(callstacks, names):
  result = [[function for function in callstack if function in names] for callstack in callstacks]
  return [callstack for callstack in result if len(callstack) > 1]

class Capture:
  # is used to throw out the initialization period when the benchmark has not been started yet, but the 
  # timetime is excessively long
  def __init__(self, callstack_csv, frame_mean, frame_var, frame_ends, init_end_time): 
    self.frame_mean = frame_mean
    self.frame_var = frame_var / 1e6 / 1e6
    self.frame_ends = frame_ends
    self.callstack_csv = callstack_csv
    
    self.callstack_csv = self.callstack_csv[self.callstack_csv[:,1] > init_end_time,:]
    
    self.callstack_csv = self.callstack_csv[self.callstack_csv[:, 1].argsort()] # sort by timestamp
    self.frame_index_to_callstacks = self.get_frame_index_to_callstacks()
    self.total_callstacks = sum(len(s) for s in self.frame_index_to_callstacks.values())

  def filter_functions(self, names):
    filterred = {}
    for frame_index, callstacks in self.frame_index_to_callstacks.items():
      filterred[frame_index] = filter_functions(callstacks, names)
    self.frame_index_to_callstacks = filterred

  # returns a dict frame_index -> List[Callstack]
  def get_frame_index_to_callstacks(self): 
    frame_index_to_callstacks = {}
    frame_index = 0
    frame_end = self.frame_ends[frame_index]
    frame_index_to_callstacks[frame_index] = []
    callstack_names = [s.split('/') for s in  self.callstack_csv[:, 2]]
    timestamps = self.callstack_csv[:, 1]
    for i in range(self.callstack_csv.shape[0]):
      timestamp = timestamps[i]

      while timestamp > frame_end:
        if frame_index + 1 == self.frame_ends.shape[0]: 
          return frame_index_to_callstacks
        frame_index += 1
        frame_end = self.frame_ends[frame_index]
        frame_index_to_callstacks[frame_index] = [] 
      frame_index_to_callstacks[frame_index].append(callstack_names[i])
    return frame_index_to_callstacks

  def get_exclusive_count(self, function_name):
    exclusive_count = 0;
    for callstacks in self.frame_index_to_callstacks.values():
      for callstack in callstacks:
        if (len(callstack) > 0 and callstack[0] == function_name):
          exclusive_count += 1
    return exclusive_count

  def avg_frame_time(self):
    return self.frame_mean / 1e6

  def total_callstacks(self):
    return self.total_callstacks

  def get_exclusive_rate(self):
    name_to_hits = {}
    for callstacks in self.frame_index_to_callstacks.values():
      for callstack in callstacks:
        head = callstack[0]
        name_to_hits.setdefault(head, 0)
        name_to_hits[head] += 1
    total_callstacks = self.total_callstacks
    return defaultdict(lambda : 0, dict([(name, (hits / total_callstacks)) for name, hits in name_to_hits.items()]))

  def time_per_frame(self): 
    mean_frame_time = self.avg_frame_time()
    return defaultdict(lambda : 0, dict([(name, (mean_frame_time  * rate)) for name, rate in self.get_exclusive_rate().items()]))

  def function_names(self):
    result = []
    for callstacks in self.frame_index_to_callstacks.values():
      for callstack in callstacks:
        result += callstack
    return list(set(result))


In [2]:
states = pd.read_csv("~/cumbia_threadstates/mainthreadstates1.csv", header=None)
states.drop(0, inplace=True, axis=1)
states.drop(1, inplace=True, axis=1)
states = states.to_numpy()

frametrack_csv = pd.read_csv('~/cumbia_threadstates/frametrack1.csv').to_numpy()

In [3]:
def intersection(start1, end1, start2, end2):
  if start2 > end1 or start1 > end2:
    return 0
  return min(end1, end2) - max(start1, start2)

def get_running_durations(start, end, states):
  result = 0
  for row in states:
    result += intersection(start, end, row[0], row[1])
  return result


durations = []
for i in range(frametrack_csv.shape[0]):
  durations += [1 - get_running_durations(frametrack_csv[i, 2], frametrack_csv[i, 3], states)/ frametrack_csv[i, 4]]

KeyboardInterrupt: 

In [2]:
def active_frame_time(callstack_csv, frametrack_csv, sampling_interval):
  frame_times = []
  for frame in frametrack_csv:
    start, end = frame[2], frame[3]
    frame_times += [np.sum(np.logical_and((start < callstack_csv[:, 1]), (callstack_csv[:, 1] < end))) * sampling_interval]
  frame_times = np.array(frame_times)
  print(frame_times/1e6, frame_times.shape[0])
  return frame_times.mean(), frame_times.var() / frame_times.shape[0]


In [14]:
def load_capture_with_frametrack(callstack_csv_path, frametrack_csv_path, max_frametime, thread_running_path=None, sampling_interval = 2e5):
  frametrack_csv = pd.read_csv(frametrack_csv_path).to_numpy()
  frametrack_csv = frametrack_csv[frametrack_csv[:, 3].argsort()] # sort by end
  callstack_csv = pd.read_csv(callstack_csv_path).to_numpy()

  start = np.min(callstack_csv[:, 1])
  frame_durations = frametrack_csv[:, 4]
  frame_mean = frame_durations.mean()
  frame_ends = frametrack_csv[:, 3]
  last_init_frame = np.max((frame_durations > max_frametime).nonzero()) 
  init_end_time = frametrack_csv[last_init_frame, 3]

  frametrack_csv = frametrack_csv[(last_init_frame+1):, :]
  print(frametrack_csv[0][2] - start)
  frame_ends = frame_ends[last_init_frame:]

  if thread_running_path == None:
    frame_durations = frame_durations[(last_init_frame+1):]
  else:
    thread_states = pd.read_csv(thread_running_path)

  # frame_mean = frame_durations.mean()
  # frame_var = frame_durations.var() / frame_durations[(last_init_frame+1):].shape[0]

  frame_mean, frame_var = active_frame_time(callstack_csv, frametrack_csv, sampling_interval)
  
  return Capture(callstack_csv, frame_mean, frame_var + sampling_interval**2, frame_ends, init_end_time)

In [4]:
# capture1 -- slow
def test(capture1, rates1, capture2, rates2, name):
  n1 = capture1.total_callstacks
  n2 = capture2.total_callstacks
  p1 = rates1[name] 
  p2 = rates2[name]  
  f1 = capture1.avg_frame_time()
  f2 = capture2.avg_frame_time()

  stat = f1*p1 - f2*p2
  var_p1 = p1 * (1 - p1) / n1 
  var_p2 = p2 * (1 - p2) / n2
  var_f1 = capture1.frame_var
  var_f2 = capture2.frame_var 

  var1 = var_f1 * var_p1 + var_p1 * f1**2 + var_f1 * p1**2
  var2 = var_f2 * var_p2 + var_p2 * f2**2 + var_f2 * p2**2
  stat = stat / np.sqrt(var1 + var2)
  
  p = scipy.stats.norm().cdf(stat)
  if (np.isnan(p)): 
    return 1.0
  return min(p, 1-p) * 2

In [5]:
def get_idle_intervals(callstack, threshold = 20e5):
  intervals = []
  times = np.sort(callstack[:,1])
  for i in range(len(times) - 1):
    if times[i+1] - times[i] > threshold:
      intervals += [[times[i], times[i+1]]]
  
  return np.array(intervals)

In [17]:
# capture1 = load_cumbia_capture_with_results_json("~/cumbia_vsync_callstack1_thread.csv", '/usr/local/google/home/avanesov/result_vsync1.json')
# capture2 = load_cumbia_capture_with_results_json("~/cumbia_vsync_callstack2_thread.csv", '/usr/local/google/home/avanesov/result_vsync2.json')
# capture1 = load_capture_with_frametrack("~/cumbia_callstack_thread4k1.csv", "~/cumbia_frametrack4k1.csv",  85*1e6)
# capture2 = load_capture_with_frametrack("~/cumbia_callstack_thread4k2.csv", "~/cumbia_frametrack4k2.csv",  85*1e6)
capture1 = load_capture_with_frametrack("~/wincum/win_fhd_callstack.csv", "~/wincum/win_fhd_frametrack.csv",   85*1e6)
capture2 = load_capture_with_frametrack("~/wincum/win_fhd_callstack1.csv", "~/wincum/win_fhd_frametrack1.csv",   85*1e6)
# capture2 = load_capture_with_frametrack("~/ggpcumpackage/fhd2_callstack.csv", "~/ggpcumpackage/fhd2_frametrack.csv",   85*1e6)
# capture1 = load_capture_with_frametrack( "~/ggpcumpackage/4k1_callstack.csv", "~/ggpcumpackage/4k1_frametrack.csv",  85*1e6)
# capture2 = load_capture_with_frametrack( "~/ggpcumpackage/4k2_callstack.csv", "~/ggpcumpackage/4k2_frametrack.csv",  85*1e6)


names1 = capture1.function_names()
names2 = capture2.function_names()
names = set(names1).intersection(set(names2))
names.remove("???")
capture1.filter_functions(names)
capture2.filter_functions(names)

13403585600
[21.  19.4 15.8 ...  5.4  5.4  0. ] 2117
6982903600
[16.2 19.6 13.4 ...  4.8  4.4  0. ] 2172


In [18]:
capture1.frame_mean, capture2.frame_mean, np.sqrt(capture1.frame_var)

(11137647.61454889, 10810865.56169429, 0.21664852501670367)

In [19]:
rate1 = capture1.get_exclusive_rate()
rate2 = capture2.get_exclusive_rate() 
pvalues = [(name, test(capture1, rate1, capture2, rate2, name)) for name in names]
reject, corrected,_,_ = multipletests([pvalue for name, pvalue in pvalues])

namesnp = np.array([name for name, pvalue in pvalues])
rejected_names = namesnp[np.array(reject)]


  stat = stat / np.sqrt(var1 + var2)
  np.log1p(-pvals))


In [20]:
corrected[corrected < 0.05]

array([], dtype=float64)

In [11]:
np.sum(reject), len(reject)

(59, 108)

In [57]:
capture1.frame_mean, capture2.frame_mean, np.sqrt(capture1.frame_var), np.sqrt(capture2.frame_var)

(14346241.290795745,
 14219258.720930232,
 0.2150544339702751,
 0.21604799848295317)

In [58]:
[(name, rate1[name], rate2[name], capture1.frame_mean*rate1[name], capture2.frame_mean*rate2[name], capture2.frame_mean/capture1.frame_mean*rate2[name]/rate1[name])  for name in rejected_names]

[('eva::graphics::rendering::rgba_blend_shader_inputs::update(float)',
  0.0004952871133441581,
  0.0008678517096678681,
  7105.508436256994,
  12340.207991169045,
  1.7367100611971915),
 ('vk::entry::vkCmdBindVertexBuffers(VkCommandBuffer_T*, unsigned int, unsigned int, VkBuffer_T* const*, unsigned long const*)',
  0.0016186187106195685,
  0.0024861398976955985,
  23221.094580345023,
  35351.066421760734,
  1.5223686506011156),
 ('eva::concurrent::thread_aware_task_sink::execute_task(bool)',
  0.005861748516691686,
  0.004181003236576376,
  84094.05860642297,
  59450.76673392615,
  0.7069556127879097)]

In [None]:
pvalues.sort(key = lambda x : x[1]) 


In [None]:
name = list(names)[10]
test(capture1, rate1, capture2, rate2, name)

In [None]:
pvalues

In [None]:
problem_name= pvalues[0][0]
rate1[problem_name], rate2[problem_name]

In [None]:
test(capture1, rate1, capture2, rate2, problem_name)

In [None]:
len(set(rejected_names).intersection(names_with_high_overhead))

In [None]:
names_with_high_overhead

In [None]:
def overhead_per_function(capture1, capture2):
  avg_frame_time1 = capture1.avg_frame_time()
  avg_frame_time2 = capture2.avg_frame_time()

  time_per_frame1 = capture1.time_per_frame()
  time_per_frame2 = capture2.time_per_frame()

  result = []
  for name in names:
    time1 = time_per_frame1[name]
    time2 = time_per_frame2[name]
    result.append((name, (time1 - time2) / (avg_frame_time1 - avg_frame_time2)))
  return result

overheads = overhead_per_function(capture1, capture2)
overheads.sort(key = lambda x: -x[1])
overheads_dict = dict(overheads)
names_with_high_overhead = [name for name in names if overheads_dict[name] > 0.01]

In [None]:
frametrack1_path = '~/cumbia_frametrack8.csv'
frametrack2_path = '~/cumbia_frametrack9.csv'
frametrack1 = pd.read_csv(frametrack1_path).to_numpy()[:, 4]
frametrack2 = pd.read_csv(frametrack2_path).to_numpy()[:, 4]

In [None]:
frametrack1

In [None]:
import scipy
from scipy.stats import ttest_ind

In [None]:
ttest_ind(frametrack1, frametrack2, equal_var=False)

In [None]:
frametrack1.mean(), frametrack2.mean()