In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import os

noise_floor = 500.0; # number of ticks which are utterly no changes
ghz = 1.4
cycles_per_second = ghz * 1000 * 1000 * 1000
cycles_per_microsecond = ghz * 1000;
seconds_per_cycle = 1 / cycles_per_second
microseconds_per_cycle = 1 / cycles_per_microsecond

In [None]:
trace_file = "/home/ubuntu/work/d_do_test.trace"
tools_path = "/home/ubuntu/d/dmd/src"

def readTraceHeader(file_path) :
    dt_header = np.dtype([
        ('magic', np.int64), ('FileVersion', np.int32),
        ('n_records', np.int32), ('n_phases', np.int32),('n_kinds', np.int32), ('n_symbols', np.int32),
        ('offset_records', np.int32), ('offset_phases', np.int32),('offset_kinds', np.int32), ('offset_symbol_info_descriptors', np.int32)
    ])

    header_f = np.fromfile(file=file_path, dtype=dt_header, count=1)
    header = pd.DataFrame(header_f, columns=header_f.dtype.names)
    return header

def readProfileRecords(file_path, h, v1_offset = 48) :
    dt_record_v1 = ([
                    ('begin_ticks', np.int64), ('end_ticks', np.int64),
                    ('begin_mem', np.int64),('end_mem', np.int64),
                    ('symbol_id', np.int32), ('kind_id', np.uint16), ('phase_id', np.uint16)
    ])
    f = open(file_path, "rb")
    f.seek(v1_offset, os.SEEK_SET)
    record_f = np.fromfile(file=f, dtype=dt_record_v1, count=h.n_records)
    records = pd.DataFrame(record_f, columns=record_f.dtype.names)
    return records

def readSelfTimes(file_path, h) :
    st = np.dtype([('symbol_id', np.int32), ('self', np.uint32)])
    selftime = np.fromfile(file=file_path, dtype=st, count=h.n_records)
    df = pd.DataFrame(selftime, columns=st.names)
    return df

def readParents(file_path, h) :
    pt = np.dtype([('parent', np.uint32)])
    parent = np.fromfile(file=file_path, dtype=pt, count=h.n_records)
    df = pd.DataFrame(parent, columns=pt.names)
    return df


In [None]:
h = readTraceHeader(trace_file)
h

In [None]:
traceFileBase =  trace_file
printTraceHeader = tools_path + "/printTraceHeader"
HeaderOutput = !$printTraceHeader $traceFileBase Header
!$printTraceHeader $traceFileBase OutputSelfStats
!$printTraceHeader $traceFileBase OutputParentTable

In [None]:
%%time
profileconvert = tools_path + "/profilecvnt"
!$profileconvert $trace_file
records = readProfileRecords(trace_file + ".v1", h)
#records[records['begin_mem'] != records['end_mem']]

In [None]:
r = records 

In [None]:
kinds = [s for s in HeaderOutput if "kinds=" in s][0] \
    .split('#')

In [None]:
phases = [s for s in HeaderOutput if "phases=" in s][0] \
    .split('#')
phases

In [None]:

r['taken_ticks'] = r['end_ticks'] - r['begin_ticks']
r['self_ticks'] =  readSelfTimes(trace_file + ".st", h)['self']
r['mem_taken'] = r['end_mem'] - r['begin_mem']
r['self_mem'] =  readSelfTimes(trace_file + ".sm", h)['self']
r['parent'] = readParents(trace_file + ".pt", h)['parent']

In [None]:
def SwapIndex(s) :
    return pd.Series(s.index.values, index=s)

SwapIndex(SwapIndex((r['kind_id'].value_counts() / len(r)).sort_values(ascending = False)).apply(lambda x: (x, kinds[x-1])))
#kinds[5 - 1]
#SwapIndex(r[r['self_mem'] > 1000].sort_values('self_mem', ascending=False).head(12)['phase_id'].value_counts()).apply(lambda x : phases[x-1])

In [None]:
#SwapIndex(SwapIndex((r['phase_id'].value_counts() / len(r)).sort_values(ascending = False)).apply(lambda x: phases[x-1])).plot(kind='bar', figsize=(21,21)) 
SwapIndex(SwapIndex((r['phase_id'].value_counts() / len(r)).sort_values(ascending = False)).apply(lambda x: (x, phases[x-1])))


In [None]:
taken_ticks_median = r['taken_ticks'].median()
taken_ticks_avg = r['taken_ticks'].mean()
self_ticks_median = r['self_ticks'].median()
self_ticks_avg = r['self_ticks'].mean()
(taken_ticks_median, taken_ticks_avg, self_ticks_median, self_ticks_avg)

In [None]:
filtered_r = r[(r['self_mem'] > 20)].sort_values(['self_mem'], ascending = False).reset_index()
max_self_mem = filtered_r['self_mem'].max()
plot_r = SwapIndex(SwapIndex(filtered_r['phase_id'].value_counts()).apply(lambda x : phases[x-1]))
#plot_r = filtered_r.head(13000).tail(7000).reset_index()['mem_taken'] #.to_csv("outlier_corrected_subset_of_symbols_taking_the_most_selftime.csv")
plt.style.use('ggplot')
plot_r.plot(kind="barh", figsize=(24,24),  legend = False)
#r.head()

In [None]:
plt.savefig(trace_file + "phases in comparision.png")

In [None]:
filtered_r[((filtered_r["mem_taken"] - filtered_r["self_mem"]) > 1000) & (True)]#.head(6525636).tail(2)
f = r['begin_ticks'].plot(legend=True, figsize = (12, 12), title = "Cummulative times of symbols")
f.set_xlabel("Symbol/Phase")
f.set_ylabel("Time per Symbol/Phase")
#f.yscale('linear')
#f.xscale('linear')
#plt.savefig(trace_file + "time_vs_memory.png")
f


In [None]:
len(filtered_r)