In [None]:
%matplotlib notebook

import common_libs.utilities as ut
import pandas as pd
import data.data_cost as dt
import numpy as np
from scipy.stats import linregress
import models.graph_models as md
import models.train as tr
import models.losses as ls
import tqdm
import seaborn as sns
import sklearn
import scipy
from matplotlib import pyplot as plt
import random
import torch

In [None]:
cnx = ut.create_connection()
times = pd.read_sql('SELECT * FROM times', cnx).set_index('time_id')

In [None]:
data_haswell = dt.load_dataset('../inputs/embeddings/code_delim.emb', '../saved/time_haswell_0113.data')
data_skylake = dt.load_dataset('../inputs/embeddings/code_delim.emb', '../saved/time_skylake_0113.data')
data_nehalem = dt.load_dataset('../inputs/embeddings/code_delim.emb', '../saved/time_nehalem_0113.data')

In [None]:
haswell_times = pd.DataFrame(data_haswell.raw_data, columns=['code_token', 'time', 'code_intel', 'code_id']).set_index('code_id')['time']
skylake_times = pd.DataFrame(data_skylake.raw_data, columns=['code_token', 'time', 'code_intel', 'code_id']).set_index('code_id')['time']
nehalem_times = pd.DataFrame(data_nehalem.raw_data, columns=['code_token', 'time', 'code_intel', 'code_id']).set_index('code_id')['time']

In [None]:
iaca_test_code_ids = set(kind_times_of_arch('iaca', 1).index)
haswell_code_ids = set(d.code_id for d in data_haswell.data)

print(float(sum(len(d.block.instrs) for d in data_haswell.data if d.code_id in iaca_code_ids)) / len(haswell_code_ids & iaca_test_code_ids))
print(float(sum(len(d.block.instrs) for d in data_haswell.data)) / len(haswell_code_ids))

In [None]:
q = set(d.code_id for d in data_haswell.data)

In [None]:
for data in [data_haswell, data_skylake, data_nehalem]:
    for datum in data.data:
        datum.block.remove_edges()
        datum.block.linearize_edges()

In [None]:
legal_code_ids = set(haswell_times.index) & set(skylake_times.index) & set(nehalem_times.index)

In [None]:
def get_model(model_name):
    model = md.GraphNN(256, 256, 1, False, False, True)
    model.set_learnable_embedding('none', 1337)
    trainer = tr.Train(model, data, tr.PredictionType.REGRESSION, ls.mse_loss, 1)
    _ = trainer.load_checkpoint(model_name)
    return model

haswell_model = get_model('../saved/paper_haswell_lstm_sgd-6_trained.mdl')
skylake_model = get_model('../saved/paper_skylake_lstm_sgd-6_trained.mdl')
nehalem_model = get_model('../saved/paper_nehalem_lstm_sgd-6_trained.mdl')

In [None]:
scipy.spatial.distance.cosine(
    skylake_model.linear.weight.detach().numpy()[0],
    nehalem_model.linear.weight.detach().numpy()[0]
)

In [None]:
skylake_model.linear.weight.detach().numpy()[0]

In [None]:
def get_preds_of(data, model_name):
    model = get_model(model_name)
    preds = []
    code_ids = []
    for datum in tqdm.tqdm(data.test):
        code_ids.append(datum.code_id)
        preds.append(model.forward(datum).item())
        model.remove_refs(datum)
    df = pd.DataFrame(preds, code_ids, columns=['pred'])
    df.index.name = 'code_id'
    return df

In [None]:
haswell_preds = get_preds_of(data_haswell, '../saved/paper_haswell_lstm_sgd-6_trained.mdl')
skylake_preds = get_preds_of(data_skylake, '../saved/paper_skylake_lstm_sgd-6_trained.mdl')
nehalem_preds = get_preds_of(data_nehalem, '../saved/paper_nehalem_lstm_sgd-6_trained.mdl')

In [None]:
model = get_model('../saved/paper_haswell_lstm_sgd-6_trained.mdl')
sampled_code_ids = random.sample(iaca_code_ids, 1000)0
codes = [d for d in data_haswell.data if d.code_id in sampled_code_ids]

In [None]:
# ','.join(map(str, (c.code_id for c in codes)))

In [None]:
%%timeit

for datum in codes:
    model.forward(datum).item()
    model.remove_refs(datum)

In [None]:
import pickle

with open('/home/ithemal/ithemal/iaca-lin64/1552421033.04/pickleout', 'rb') as f:
    m = pickle.load(f)
m = {int(k): m[k] for k in m}

In [None]:
z = kind_times_of_arch('iaca', 1)
# for k in m:
#     z.loc[k] = m[k]
z = z.loc[list(m.keys())]
((z - haswell_times).abs() / haswell_times).mean()

In [None]:
((z - haswell_times).abs() / haswell_times).mean()

In [None]:
haswell_train_nehalem_preds = get_preds_of(data_nehalem, '../saved/paper_haswell_lstm_sgd-6_trained.mdl')
# haswell_train_skylake_preds = get_preds_of(data_haswell, '../saved/paper_haswell_lstm_sgd-6_trained.mdl')

# skylake_preds = get_preds_of(data_skylake, '../saved/paper_skylake_lstm_sgd-6_trained.mdl')
# nehalem_preds = get_preds_of(data_nehalem, '../saved/paper_nehalem_lstm_sgd-6_trained.mdl')

In [None]:
idxs = set(haswell_train_nehalem_preds.index) & set(nehalem_times.index)
print(((haswell_train_nehalem_preds['pred'] - nehalem_times).abs() / nehalem_times).dropna().mean())
print(scipy.stats.spearmanr(haswell_train_nehalem_preds['pred'].loc[idxs], nehalem_times.loc[idxs]))

print(((nehalem_preds['pred'] - nehalem_times).abs() / nehalem_times).dropna().mean())
print(scipy.stats.spearmanr(nehalem_preds['pred'].loc[idxs], nehalem_times.loc[idxs]))

In [None]:
z = set(haswell_times.index) & set(kind_times_of_arch('iaca', 1).index)
q = [d.block.instrs for d in data_haswell.data if d.code_id in z]
sum(map(len, q)) / float(len(q))

In [None]:
def kind_times_of_arch(kind, arch):
    return times[(times['kind'] == kind) & (times['arch'] == arch)].groupby('code_id').mean()['time']

In [None]:
def merge_sort(li, c):
    if len(li) < 2: return li 
    m = len(li) / 2 
    left = li[:m]
    right = li[m:]
    merge_sort(left, c)
    merge_sort(right, c)
    merge(left, right, c)

def merge(l, r, c):
    result = []
    l.reverse()
    r.reverse()
    while l and r:
        if l[-1] < r[-1]:
            s = l
        else:
            s = r
            c[0] += len(l)
        result.append(s.pop())
    rest = l or r
    rest.reverse()
    result.extend(rest)

def inversions_of_preds(preds, truth):
    truth_idx_dict = {k: i for (i, k) in enumerate(truth.sort_values().index)}
    pred_lst = [truth_idx_dict[k] for k in preds.sort_values().index]
    n_inversions = [0]
    merge_sort(pred_lst, n_inversions)
    return '{:.3f}'.format(float(n_inversions[0]) / (len(truth)**2/2))

def inversions_of_arch(archname, archno, arch_preds, arch_times):
    inv_iacas = kind_times_of_arch('iaca', archno)
    inv_llvms = kind_times_of_arch('llvm', archno)
    inv_preds = arch_preds['pred']
    inv_keys = set(inv_iacas.index) & set(inv_llvms.index) & set(inv_preds.index)
    for (corrname, corrfun) in (
        ('spearman', scipy.stats.spearmanr),
        ('pearson', scipy.stats.pearsonr),
        ('%inversions', inversions_of_preds),
    ):
        for (methodname, methoddf) in (
            ('IACA', inv_iacas),
            ('LLVM', inv_llvms),
            ('Ithemal', inv_preds),
        ):
            print('{} {} {}: {}'.format(
                archname,
                methodname,
                corrname,
                corrfun(methoddf.loc[inv_keys], arch_times.loc[inv_keys])
            ))
inversions_of_arch('Haswell', 1, haswell_preds, haswell_times)
# inversions_of_arch('Skylake', 2, skylake_preds, skylake_times)
# inversions_of_arch('Nehalem', 3, nehalem_preds, nehalem_times)

In [None]:
def get_dataset(dataset):
    X = np.zeros((len(dataset), 1260), dtype=np.float32)
    y = np.empty(len(dataset))
    for (idx, d) in enumerate(tqdm.tqdm(dataset)):
        y[idx] = d.y
        for i in d.block.instrs:
            X[idx, i.opcode] += 1
    return X, y

In [None]:
X_train, y_train = get_dataset(data_nehalem.train)
lr = sklearn.linear_model.LinearRegression().fit(X_train, y_train)

In [None]:
X_test, y_test = get_dataset(data_nehalem.test)

In [None]:
y_pred = lr.predict(X_test)
scipy.stats.spearmanr(y_test, y_pred)

In [None]:
np.mean(np.abs(y_pred - y_test) / y_test)

In [None]:
inv_iacas = kind_times_of_arch('iaca', 1)
inv_llvms = kind_times_of_arch('llvm', 1)
inv_preds = haswell_preds['pred']
inv_keys = set(inv_iacas.index) & set(inv_llvms.index) & set(inv_preds.index)
arch_times = haswell_times.loc[inv_keys]
inv_iacas = inv_iacas.loc[inv_keys]
inv_llvms = inv_llvms.loc[inv_keys]
inv_preds = inv_preds.loc[inv_keys]

plt.figure()
plt.title('IACA linear correlation')
plt.xlabel('Actual')
plt.ylabel('IACA Prediction')
plt.scatter(arch_times, inv_iacas)
m,b = np.polyfit(arch_times, inv_iacas, 1)
plt.plot(np.linspace(0, arch_times.max()), np.linspace(0, arch_times.max()) * m + b, '--')
plt.show()

In [None]:
plt.figure()
plt.title('LLVM linear correlation')
plt.xlabel('Actual')
plt.ylabel('LLVM Prediction')
plt.scatter(arch_times, inv_llvms)
m,b = np.polyfit(arch_times, inv_llvms, 1)
plt.plot(np.linspace(0, arch_times.max()), np.linspace(0, arch_times.max()) * m + b, '--')
plt.show()

In [None]:
plt.figure()


plt.title('Ithemal linear correlation')
plt.xlabel('Actual')
plt.ylabel('Ithemal Prediction')
plt.scatter(arch_times, inv_preds)
m,b = np.polyfit(arch_times, inv_preds, 1)
plt.plot(np.linspace(0, arch_times.max()), np.linspace(0, arch_times.max()) * m + b, '--')

In [None]:
test_set_idxs = set(inv_preds.index) & set(d.code_id for d in data_haswell.test)
cdiff = np.abs(inv_preds.loc[test_set_idxs] - arch_times.loc[test_set_idxs])
sum(
    (cdiff < np.abs(inv_llvms.loc[test_set_idxs] - arch_times.loc[test_set_idxs])) & 
    (cdiff < np.abs(inv_iacas.loc[test_set_idxs] - arch_times.loc[test_set_idxs]))
) / float(len(test_set_idxs))

In [None]:
time_key = set(haswell_times.index) & set(skylake_times.index) & set(nehalem_times.index)
print('haswell -- skylake: {}'.format(scipy.stats.spearmanr(haswell_times.loc[time_key], skylake_times.loc[time_key])))
print('skylake -- nehalem: {}'.format(scipy.stats.spearmanr(skylake_times.loc[time_key], nehalem_times.loc[time_key])))
print('haswell -- nehalem: {}'.format(scipy.stats.spearmanr(haswell_times.loc[time_key], nehalem_times.loc[time_key])))

In [None]:
mean_times = times[times['kind'] == 'actual'].groupby('code_id').mean()['time']
legal_actual_times = mean_times[(mean_times >= 20) & (mean_times <= 10000)].sort_values()
ax = sns.kdeplot(legal_actual_times, shade=True)
_ = ax.set_title('Original time PDF')

In [None]:
log_data = np.log(legal_actual_times)
scaler = sklearn.preprocessing.MinMaxScaler((-1, 1))
scaled_log_data = scaler.fit_transform(log_data.values.reshape(-1, 1)).reshape(-1)
ax = sns.kdeplot(scaled_log_data, shade=True)
_ = ax.set_title('Log-Scaled time PDF')

In [None]:
sklearn.preprocessing.normalize(np.log(legal_actual_times).values.reshape(1, -1)).reshape(-1)

In [None]:
block_lens = np.array([len(d.block.instrs) for d in data_haswell.data])
block_lens.sort()

In [None]:
iaca_code_ids = set(times[times['kind'] == 'iaca']['code_id'])
largest_iaca_block = max((d for d in data_haswell.data if d.code_id in iaca_code_ids), key=lambda d: len(d.block.instrs))

In [None]:
len(largest_iaca_block.block.instrs)

In [None]:
sum(1.0 for d in data_haswell.data if d.code_id in iaca_code_ids) / len(data_haswell.data)

In [None]:
sns.kdeplot(np.log(block_lens), cumulative=True)

In [None]:
class Mdl(torch.nn.Module):
    def __init__(self):
        super(Mdl, self).__init__()
        self.l0 = torch.nn.Linear(1, 10)
        self.l1 = torch.nn.Linear(10, 1)
        self.l2 = torch.nn.Linear(10, 1)
        
    def forward(self, x):
        a = self.l0(x)
        b = torch.nn.functional.relu(a)
        with torch.no_grad():
            c = self.l1(b)
        d = c + self.l2(z)
        return d

In [None]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
params

In [None]:
m = Mdl()
o = torch.optim.SGD(m.parameters(), lr=1)
o.zero_grad()

m(torch.ones(1)).backward()

print(m.l2.weight.grad)
print(m.l1.weight.grad)
print(m.l0.weight.grad)
print('')

o.zero_grad()
print(m.l2.weight.grad)
print(m.l1.weight.grad)
print(m.l0.weight.grad)
print('')

z = m.l0(torch.ones(1))
with torch.no_grad():
    res = m.l1(z)
q = res + m.l2(z)
q.backward()


print(m.l2.weight.grad)
print(m.l1.weight.grad)
print(m.l0.weight.grad)



In [None]:
iaca_haswell = kind_times_of_arch('iaca', 1)
llvm_haswell = kind_times_of_arch('llvm', 1)
ks = set(haswell_times.index) & set(haswell_preds['pred'].index) & set(iaca_haswell.index) & set(llvm_haswell.index)

In [None]:
q = []
for i in range(50, 1050, 100):
    ks2 = set(haswell_times.loc[ks].loc[haswell_times < i].index)
    z = (
        (np.abs(haswell_times.loc[ks2] - haswell_preds['pred'].loc[ks2]) < np.abs(haswell_times.loc[ks2] - iaca_haswell.loc[ks2]))
        &
        (np.abs(haswell_times.loc[ks2] - haswell_preds['pred'].loc[ks2]) < np.abs(haswell_times.loc[ks2] - llvm_haswell.loc[ks2]))
    ).sum() / float(len(ks2))
    q.append((i, z))

In [None]:
q