In [None]:
%matplotlib inline

import common_libs.utilities as ut
import pandas as pd
import scipy.special
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
cnx = ut.create_connection()

In [None]:
times = pd.read_sql('SELECT time_id, code_id, time FROM deep_perm_times', cnx).set_index('time_id')
perms = pd.read_sql('SELECT perm_id, code_id, code_intel FROM deep_perms', cnx).set_index('perm_id')

In [None]:
def get_stats(time_df):
    stats = time_df.groupby('code_id').agg({'time': ['median', 'mean', 'std', 'count']})
    stats = stats['time'].loc[time_df['code_id']]
    stats['time_id'] = time_df.index
    return stats.set_index('time_id')

In [None]:
stats = get_stats(times)
real_times = times[((times['time'] - stats['median']) / stats['std']).abs() < 2]
real_stats = get_stats(real_times)
time_diffs = (real_times['time'] - real_stats['mean'])
z_scores = time_diffs / real_stats['std']

In [None]:
plt.title('Time variability distribution plot -- absolute cycle count')
sns.distplot(time_diffs.dropna().values)

In [None]:
plt.title('Time variability distribution plot -- Z score')
sns.distplot(z_scores.dropna().values)

In [None]:
grouped_stats = times.groupby('code_id').agg({'time': ['median', 'mean', 'std', 'count']})
grouped_real_stats = real_times.groupby('code_id').agg({'time': ['median', 'mean', 'std', 'count']})

def print_times_of_code_id(code_id):
    print('Basic block')
    print('-' * 80)
    print(perms[perms['code_id'] == code_id]['code_intel'].iloc[0])
    print('\nFull Stats')
    print('-' * 80)
    print(grouped_stats.loc[code_id])
    print('\nOutlier-pruned Stats')
    print('-' * 80)
    print(grouped_real_stats.loc[code_id])
    print('\nTimes')
    print('-' * 80)
    print(times[times['code_id'] == code_id].sort_values('time')['time'])

most_variable_code_id_idx = 1
most_variable_code_ids = times.loc[z_scores.abs().sort_values(ascending=False).index]['code_id']
print_times_of_code_id(most_variable_code_ids.iloc[most_variable_code_id_idx])

In [None]:
sns.distplot(z_scores.dropna().values)

In [None]:
z_scores = real

In [None]:
times[(times['time'] - medians) / stds]

In [None]:
perms = pd.read_sql('SELECT code_id, count(1) as n_perms FROM perms GROUP BY code_id', cnx).set_index('code_id')
code = pd.read_sql('SELECT code_id, (LENGTH(code_intel) - LENGTH(REPLACE(code_intel, "\n", ""))) as n_instrs FROM code', cnx).set_index('code_id')

In [None]:
joined = perms.join(code, how='inner')
utilization = joined['n_perms'] / joined['n_instrs'].apply(scipy.special.factorial)

In [None]:
hist, bins = np.histogram(utilization)
hist = hist.astype(np.float32) / hist.sum()
width = 0.9 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.title('Parallel permutation utilization distribution')
plt.xlabel(r'Ratio of utilization $\left(\frac{\#\it{permutations}}{\#\it{instructions}!}\right)$', fontsize=18)
plt.ylabel('Percentage of examples in random sample')
plt.show()

In [None]:
times = pd.read_sql('SELECT time_id, code_id, time FROM times WHERE kind="actual" AND arch=1', cnx).set_index(['code_id', 'time_id'])

In [None]:
time_agg = times.groupby('code_id').agg({'time': ['mean', 'std']})
time_agg['std/mean'] = time_agg['time']['std'] / time_agg['time']['mean']
time_agg.sort_values('std/mean', ascending=False)

In [None]:
times.sort_values(times.index)

In [None]:
times = times.reset_index('time_id', drop=True)

In [None]:
times - time_agg['time']['mean']

In [None]:
times - time_agg['time']['mean']

In [None]:
zscores = np.concatenate(((times.groupby('code_id').apply(lambda r: np.array(r.time)) - time_agg['time']['mean']) / time_agg['time']['std']).values)
zscores[np.where(np.isinf(zscores))] = 0
zscores = zscores[np.where(~np.isnan(zscores))]

In [None]:
sns.distplot(zscores[(zscores > -5) & (zscores < 5)], hist_kws={'range': [-5, 5]})
plt.plot()

In [None]:
_ = plt.hist(zscores, bins=50, range=(-3, 3), normed=True)

In [None]:
times[['code_id', 'time']].apply(lambda row: row['time'] / time_agg.loc[row['code_id']]['time']['std'], axis=1)