In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
filename = './model_sparsity/decode_bc_tile8_th0.90_layer_sparsity.txt'
exp_list = []
with open(filename, 'r') as sparsity_file:
    lines = sparsity_file.readlines()
    for line in lines:
        if line.startswith('module name: '):
            experiment = {'model': line.split('.')[1],
                          'layer_num': int(line.split('.')[2]),
                          'module': line.split('.')[4]}
        elif line.startswith('module tiles:'):
            experiment['tiles'] = int(float(line.split(': ')[1]))
        elif line.startswith('module pruned tiles'):
            experiment['pruned_tiles'] = int(line.split('tiles ')[1])
        elif line.startswith('module: tiles pruning'):
            exp_list.append(experiment)

df_sparsity = pd.DataFrame(exp_list)
df_sparsity



Unnamed: 0,model,layer_num,module,tiles,pruned_tiles
0,encoders,0,linear_q,1024,269
1,encoders,0,linear_k,1024,277
2,encoders,0,linear_v,1024,396
3,encoders,0,linear_out,1024,376
4,encoders,0,w_1,8192,2741
...,...,...,...,...,...
103,decoders,5,linear_k,1024,256
104,decoders,5,linear_v,1024,215
105,decoders,5,linear_out,1024,218
106,decoders,5,w_1,8192,1756


In [3]:
df = pd.read_csv('./output/load_skip_data.csv', sep=',')
df.head()

Unnamed: 0.1,Unnamed: 0,model,layer,sim_seconds
0,0,Decoder sparsity 0,Query1,0.037954
1,1,Decoder sparsity 0,Key1,0.037817
2,2,Decoder sparsity 0,Value1,0.038081
3,3,Decoder sparsity 0,Transpose1,0.005051
4,4,Decoder sparsity 0,QKT1,0.097844


In [4]:
df['sparsity'] = df['model'].apply(lambda x: int(x.split(' ')[-1]))
df['encoder_decoder'] = df['model'].apply(lambda x: x.split(' ')[0].lower())
MHA_layers = ['Query', 'Key', 'Value', 'Transpose', 'QKT', 'Pre-Softmax', 'Score', 'Post-Softmax']
df['MHA'] = df['layer'].apply(lambda x: x[:-1] in MHA_layers)
df['Head'] = df[df['MHA']]['layer'].apply(lambda x: int(x[-1]))
df['module'] = df['layer'].apply(lambda x: x[:-1] if x[:-1] in MHA_layers else x)
df


Unnamed: 0.1,Unnamed: 0,model,layer,sim_seconds,sparsity,encoder_decoder,MHA,Head,module
0,0,Decoder sparsity 0,Query1,0.037954,0,decoder,True,1.0,Query
1,1,Decoder sparsity 0,Key1,0.037817,0,decoder,True,1.0,Key
2,2,Decoder sparsity 0,Value1,0.038081,0,decoder,True,1.0,Value
3,3,Decoder sparsity 0,Transpose1,0.005051,0,decoder,True,1.0,Transpose
4,4,Decoder sparsity 0,QKT1,0.097844,0,decoder,True,1.0,QKT
...,...,...,...,...,...,...,...,...,...
755,755,Encoder sparsity 35,Projection,0.491314,35,encoder,False,,Projection
756,756,Encoder sparsity 35,Add/Norm0,0.081558,35,encoder,False,,Add/Norm0
757,757,Encoder sparsity 35,FF0,5.003215,35,encoder,False,,FF0
758,758,Encoder sparsity 35,FF1,4.885143,35,encoder,False,,FF1


In [5]:
df.groupby(['encoder_decoder', 'sparsity', 'module'])['sim_seconds'].mean()

encoder_decoder  sparsity  module   
decoder          0         Add/Norm0    0.013406
                           Add/Norm1    0.004681
                           FF0          1.484674
                           FF1          1.436320
                           Key          0.038578
                                          ...   
encoder          45        QKT          5.670132
                           Query        0.118722
                           Score        5.420628
                           Transpose    0.023022
                           Value        0.113639
Name: sim_seconds, Length: 280, dtype: float64

In [6]:
prunable_layers = ["Query", "Key", "Value", "Projection", "FF0", "FF1"]
df['prunable'] = df['module'].apply(lambda x: x in prunable_layers)
df[df['prunable'] == False].groupby(['encoder_decoder','module'])['sim_seconds'].mean()

encoder_decoder  module      
decoder          Add/Norm0       0.013970
                 Add/Norm1       0.005252
                 Nan             0.004935
                 Post-Softmax    0.013637
                 Pre-Softmax     0.031004
                 QKT             0.100075
                 Score           0.099798
                 Transpose       0.005845
encoder          Add/Norm0       0.071373
                 Add/Norm1       0.028173
                 Nan             0.026618
                 Post-Softmax    0.309046
                 Pre-Softmax     0.686921
                 QKT             5.663451
                 Score           5.406620
                 Transpose       0.030193
Name: sim_seconds, dtype: float64

In [27]:
encoder_non_prunable = 0.71373 * 2 + 4* (0.309046 + 0.686921 + 5.6634 + 5.4066 + 0.03)
decoder_non_prunable = 0.01397 * 3 + 8 * (0.013637 + 0.031004 + 0.100075 + 0.099798  + 0.0058)
df_prunable = df[df['prunable'] == True].groupby(['encoder_decoder', 'sparsity', 'module'])['sim_seconds'].mean().reset_index()
df_prunable.head()

Unnamed: 0,encoder_decoder,sparsity,module,sim_seconds
0,decoder,0,FF0,1.484674
1,decoder,0,FF1,1.43632
2,decoder,0,Key,0.038578
3,decoder,0,Projection,0.144371
4,decoder,0,Query,0.038302


In [16]:
name_dict = {'linear_q': 'Query',
             'linear_v': 'Value',
             'linear_k': 'Key',
             'linear_out': 'Projection',
             'w_1': 'FF0',
             'w_2': 'FF1',
             'encoders': 'encoder',
             'decoders': 'decoder'}
df_sparsity = df_sparsity.replace(name_dict)
df_sparsity['sparsity'] = (df_sparsity['pruned_tiles'] / df_sparsity['tiles']) * 100
df_sparsity['sparsity'] = df_sparsity['sparsity'].apply(lambda  x: min(5* round(x/5), 45))
df_sparsity.head()

Unnamed: 0,model,layer_num,module,tiles,pruned_tiles,sparsity
0,encoder,0,Query,1024,269,25
1,encoder,0,Key,1024,277,25
2,encoder,0,Value,1024,396,40
3,encoder,0,Projection,1024,376,35
4,encoder,0,FF0,8192,2741,35


In [18]:
df_prunable_time = pd.merge(left=df_sparsity, right=df_prunable, left_on=['model', 'module', 'sparsity'], right_on=['encoder_decoder', 'module', 'sparsity'], how='left')
df_prunable_time['MHA'] = df_prunable_time['module'].apply(lambda x: x in ['Query', 'Key', 'Value'])
df_prunable_time.loc[df_prunable_time['MHA'] * (df_prunable_time['encoder_decoder'] == 'encoder'), 'sim_seconds'] *= 4
df_prunable_time.loc[df_prunable_time['MHA'] * (df_prunable_time['encoder_decoder'] == 'decoder'), 'sim_seconds'] *= 8
df_prunable_time

Unnamed: 0,model,layer_num,module,tiles,pruned_tiles,sparsity,encoder_decoder,sim_seconds,MHA
0,encoder,0,Query,1024,269,25,encoder,0.645922,True
1,encoder,0,Key,1024,277,25,encoder,0.637129,True
2,encoder,0,Value,1024,396,40,encoder,0.496785,True
3,encoder,0,Projection,1024,376,35,encoder,0.491314,False
4,encoder,0,FF0,8192,2741,35,encoder,5.003215,False
...,...,...,...,...,...,...,...,...,...
103,decoder,5,Key,1024,256,25,decoder,0.030983,True
104,decoder,5,Value,1024,215,20,decoder,0.031192,True
105,decoder,5,Projection,1024,218,20,decoder,0.115717,False
106,decoder,5,FF0,8192,1756,20,decoder,1.205852,False


In [19]:
df_prunable_time.groupby('encoder_decoder')['sim_seconds'].sum()

encoder_decoder
decoder     14.888418
encoder    159.828436
Name: sim_seconds, dtype: float64

In [30]:
total_prunable_time = df_prunable_time['sim_seconds'].sum()
total_non_prunable_time = 12* encoder_non_prunable + 6* decoder_non_prunable
total_prunable_time, 12*encoder_non_prunable, 6*decoder_non_prunable, total_non_prunable_time

(174.71685375, 597.735936, 12.266532000000002, 610.002468)

In [26]:
df_baseline = df_prunable[df_prunable['sparsity'] == 0].copy()
df_baseline['MHA'] = df_baseline['module'].apply(lambda x: x in ['Query', 'Key', 'Value'])
df_baseline.loc[df_baseline['MHA'] * (df_baseline['encoder_decoder'] == 'encoder'), 'sim_seconds'] *= 4
df_baseline.loc[df_baseline['MHA'] * (df_baseline['encoder_decoder'] == 'decoder'), 'sim_seconds'] *= 8
baseline_prunable_time = df_baseline.groupby('encoder_decoder')['sim_seconds'].sum()
# total_baseline_prunable_time = baseline_prunable_time.loc['encoder'] * 12 + baseline_prunable_time.loc['decoder'] * 6
baseline_prunable_time.loc['encoder'] * 12, baseline_prunable_time.loc['decoder'] * 6

(214.08962400000001, 23.919354000000002)

In [12]:
(total_prunable_time + total_non_prunable_time) / (total_baseline_prunable_time + total_non_prunable_time), total_prunable_time/ total_baseline_prunable_time

(0.9275846800678093, 0.7416834164099856)