In [1]:
import numpy as np
import pandas as pd
import pickle
import psutil
import random as rn
import tensorflow as tf

In [2]:
import keras
from keras import metrics
from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.optimizers import Adam, RMSprop
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, History 
from keras import backend as K

Using TensorFlow backend.


In [3]:
with open('lime_analysis_data/complete_data.pkl', 'rb') as handle:
    complete_data = pickle.load(handle)

In [6]:
with open('lime_analysis_data/old/results.pkl', 'rb') as handle:
    lime_dict = pickle.load(handle)

In [7]:
intensity_df = lime_dict['intensity_df'].copy()

# Analysis of Intensity df

In [8]:
header = intensity_df.iloc[0]
intensity_df = intensity_df[1:]
intensity_df.columns = header

In [9]:
intensity_df_trans = intensity_df.T

In [10]:
intensity_df_trans['sum_of_intensities'] = intensity_df_trans.abs().sum(axis=1)

In [11]:
intensity_df_trans.sort_values(by=['sum_of_intensities'],ascending=False,inplace=True)

# List of columns dict

In [13]:
list_of_cols = {}

# Drop 0 intensities

In [14]:
intensity_df_trans_0 = intensity_df_trans.copy()

In [15]:
intensity_df_trans_0 = intensity_df_trans_0.loc[(intensity_df_trans_0!=0).any(axis=1)]

In [16]:
list_of_cols['drop_0_columns'] = list(intensity_df_trans_0.index)

# Drop last n%

In [17]:
intensity_df_trans_n = intensity_df_trans.copy()

In [18]:
for n in [5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95]:
    list_of_cols['drop_{}_pct_columns'.format(n)] = list(intensity_df_trans_n.iloc[:int(np.ceil(intensity_df_trans_n.shape[0] * (1-(n/100)))),:].index)

    # Training with selected columns

In [22]:
x_train = complete_data['xtrain']
x_test = complete_data['xtest']
x_val = complete_data['xval']
y_train = complete_data['ytrain']
y_test = complete_data['ytest']
y_val = complete_data['yval']
param_dict = complete_data['params_dict']

In [23]:
param_dict

{'activation': 'relu',
 'batch_size': 32,
 'dropout1': 0.5099647106824179,
 'dropout2': 0.5614162810051894,
 'early_stop_rounds': 30,
 'num_layers': 'two_hidden',
 'optimizer': 'rmsprop',
 'units1': 512,
 'units2': 128}

In [24]:
def create_model(x_train_temp,y_train_temp,x_valid_temp,y_valid_temp,params):    
#     x_train_temp = x_train.copy()
# #     x_test_temp = x_test.copy()
#     y_train_temp = y_train.copy()
# #     y_test_temp = y_test.copy()
    model = Sequential()
    model.add(Dense(params['units1'], input_shape=(x_train_temp.shape[1],)))
    model.add(Activation(params['activation']))
    model.add(Dropout(params['dropout1']))
    if(params['num_layers'] == 'two_hidden'):
        model.add(Dense(params['units2']))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout2']))
    model.add(Dense(1))
    model.add(Activation('linear'))
    model.compile(loss='mse', metrics=['mse'],
                  optimizer=params['optimizer'])
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=params['early_stop_rounds'])
    history = History()
    model.fit(x_train_temp, y_train_temp,
              batch_size=params['batch_size'],
              epochs=500,
              callbacks=[early_stop, history],
              verbose=0,
              validation_data=(x_valid_temp,y_valid_temp)) 
#     [loss, mse] = model.evaluate(x_valid_temp,y_valid_temp, verbose=0)
#     num = 1
#     mem = psutil.virtual_memory()
#     if(np.isnan(mse)):
#         print("{}) Validation set root mean sq. error: NaN".format(num),"\tAvailable Mem:",(mem.available/1024)/1024,"mb")
#         num = num + 1
#         return {'loss': np.inf, 'status': STATUS_OK, 'model': model}
#     print("{}) Validation set root mean sq. error: {:7.2f}".format(num,mse**0.5),"\tAvailable Mem:",(mem.available/1024)/1024,"mb")
#     num = num + 1
#     return {'loss': loss**0.5, 'model': model}
    return model

In [19]:
import os
os.environ['PYTHONHASHSEED'] = '0'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.

np.random.seed(42)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.

rn.seed(12345)

# Force TensorFlow to use single thread.
# Multiple threads are a potential source of
# non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see: https://www.tensorflow.org/api_docs/python/tf/set_random_seed

tf.set_random_seed(1234)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

resultant_dict = {}

for num,(key,cols) in enumerate(list_of_cols.items()):
    print(num,key,len(cols))
    x_train_temp = x_train[cols]
    y_train_temp = y_train
    x_valid_temp = x_val[cols]
    y_valid_temp = y_val
    x_test_temp = x_test[cols]
    y_test_temp = y_test
    result_model = create_model(x_train_temp,y_train_temp,x_valid_temp,y_valid_temp,param_dict)
    val_rmse = result_model.evaluate(x_valid_temp,y_valid_temp,verbose=0)[0] ** 0.5
    test_rmse = result_model.evaluate(x_test_temp,y_test_temp,verbose=0)[0] ** 0.5
    resultant_dict['{}'.format(key)] = [val_rmse,test_rmse]

0 drop_0_columns 371
1 drop_5_pct_columns 377
2 drop_10_pct_columns 357
3 drop_15_pct_columns 337
4 drop_20_pct_columns 317
5 drop_25_pct_columns 297
6 drop_30_pct_columns 278
7 drop_35_pct_columns 258
8 drop_40_pct_columns 238
9 drop_45_pct_columns 218
10 drop_50_pct_columns 198
11 drop_55_pct_columns 179
12 drop_60_pct_columns 159
13 drop_65_pct_columns 139
14 drop_70_pct_columns 119
15 drop_75_pct_columns 99
16 drop_80_pct_columns 80
17 drop_85_pct_columns 60
18 drop_90_pct_columns 40
19 drop_95_pct_columns 20


In [25]:
result_model = create_model(x_train,y_train,x_val,y_val,param_dict)
val_rmse = result_model.evaluate(x_valid_temp,y_valid_temp,verbose=0)[0] ** 0.5
test_rmse = result_model.evaluate(x_test_temp,y_test_temp,verbose=0)[0] ** 0.5

InternalError: Blas GEMM launch failed : a.shape=(32, 396), b.shape=(396, 512), m=32, n=512, k=396
	 [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, _class=["loc:@training/RMSprop/gradients/dense_1/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_dense_1_input_0_0/_37, dense_1/kernel/read)]]
	 [[Node: loss/mul/_61 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_462_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

In [29]:
38330.328318492604/y_test.mean()

0.2129588231677604

In [28]:
24127.92324998959/y_test.mean()

0.13405192092552098

In [31]:
resultant_dict

{'drop_0_columns': [38180.88478590409, 32393.07764326738],
 'drop_10_pct_columns': [36459.43771446801, 31903.99248604833],
 'drop_15_pct_columns': [42274.43589656063, 36352.379856202286],
 'drop_20_pct_columns': [40299.35055179614, 34750.36465303494],
 'drop_25_pct_columns': [40247.72049128779, 35580.4434797131],
 'drop_30_pct_columns': [42975.193574738056, 37285.77155742258],
 'drop_35_pct_columns': [33855.85560395516, 30954.215814223746],
 'drop_40_pct_columns': [37790.79240092294, 31889.044670221487],
 'drop_45_pct_columns': [43307.91395831355, 36679.70514520964],
 'drop_50_pct_columns': [42635.660823403174, 36845.25526914961],
 'drop_55_pct_columns': [40493.28333235628, 35966.76706010722],
 'drop_5_pct_columns': [43523.11594828978, 37387.8444726261],
 'drop_60_pct_columns': [40399.35709969857, 35828.73778622339],
 'drop_65_pct_columns': [45669.438143671316, 38380.02587042423],
 'drop_70_pct_columns': [41786.93932764655, 36145.94987202858],
 'drop_75_pct_columns': [43501.05412912229

In [34]:
result_lime_analysis_1 = pd.DataFrame(resultant_dict).T

In [35]:
result_lime_analysis_1.columns = ['valid_rmse','test_rmse']

In [40]:
result_lime_analysis_1.to_csv('lime_analysis_data/result_lime_analysis_1.csv')

# NO concrete pattern in RMSE inc/dec with dropping unimportant features on the basis of intensity.