In [1]:
import numpy as np
from tensorflow.compat import v1 as tf
import random
import os

import re
from extract_tweets import clean_up_text

import pickle

SEED = 0

np.random.seed(SEED)

def getSeed():
    return np.random.randint(1, 2**32) - 1

tf.set_random_seed(getSeed())
tf.random_normal_initializer(getSeed())
tf.random_uniform_initializer(getSeed())

random.seed(getSeed())

os.environ['PYTHONHASHSEED'] =  str(getSeed())
os.environ['CUDA_VISIBLE_DEVICES'] = str(getSeed())
os.environ['TF_CUDNN_USE_AUTOTUNE'] = str(getSeed())

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
tf.keras.backend.set_session(sess)

In [2]:
import pandas as pd
from copy import deepcopy

with open('Downloads/training.1600000.processed.noemoticon.csv', 'rb') as fopen:
    file = fopen.readlines()

file1 = deepcopy(file)    
file1 = [str(line.decode("latin-1")).split('"') for line in file1]

df = pd.DataFrame(file1)[[1,5,9,11]]
df.columns = ['target', 'date', 'user', 'text']

In [3]:
df['target'].unique(),df['date'].unique(),df['user'].unique(),df['text'].unique()

(array(['0', '4'], dtype=object),
 array(['Mon Apr 06 22:19:45 PDT 2009', 'Mon Apr 06 22:19:49 PDT 2009',
        'Mon Apr 06 22:19:53 PDT 2009', ...,
        'Tue Jun 16 08:38:58 PDT 2009', 'Tue Jun 16 08:39:00 PDT 2009',
        'Tue Jun 16 08:40:50 PDT 2009'], dtype=object),
 array(['_TheSpecialOne_', 'scotthamilton', 'mattycus', ..., 'EvolveTom',
        'AmandaMarie1028', 'bpbabe'], dtype=object),
 array(["@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D",
        "is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!",
        '@Kenichan I dived many times for the ball. Managed to save 50%  The rest go out of bounds',
        ..., 'Are you ready for your MoJo Makeover? Ask me for details ',
        'Happy 38th Birthday to my boo of alll time!!! Tupac Amaru Shakur ',
        'happy #charitytuesday @theNSPCC @SparksCharity @SpeakingUpH4H '],
       dtype=obje

In [4]:
TOTAL_SAMPLE_SPACE = np.ceil(1529881 / 0.9 * 0.03268228051)

inds = np.arange(len(file))
np.random.shuffle(inds)
file = np.array(file)[inds]

X_raw = np.empty(len(file), dtype='object')
y_raw = np.empty(len(file), dtype='int')
j = 0

for i,line in enumerate(file):
    
    ls = str(line.decode("latin-1")).split('"')
    
    text = ls[-2]
    
    if ('http://' not in text) and ('https://' not in text):
        
        text = text.lower()
        text = re.sub('&', ' <and> ', text)
        text = re.sub('amp;', ' ', text)
        text = re.sub("'n'", ' <and> ', text)
        text = re.sub(' and$', ' <and> ', text)
        text = re.sub('^and ', ' <and> ', text)
        text = re.sub(' and ', ' <and> ', text)
        text = re.sub('â€¦', ' <ellipsis> ', text)
        text = re.sub('\.\.\.', ' <ellipsis> ', text)
        text = re.sub('@[a-z0-9]+', ' <handle> ', text)
        text = re.sub('#[a-z0-9]+', ' <hashtag> ', text)
        text = re.sub('@', ' <at> ', text)
        text = re.sub('#', ' <pound> ', text)
        text = re.sub('[\!]', ' <exclam> ', text)
        text = re.sub('[\?]', ' <quest> ', text)
        text = re.sub('[^\w\s\'\"\<\>]', ' <punct> ', text)
        text = clean_up_text(text)
        
        X_raw[j] = text
        y_raw[j] = int(ls[1]) / 4
        j += 1
    if j == TOTAL_SAMPLE_SPACE: break
    
X_raw = X_raw[:j]
y_raw = y_raw[:j]

with open('X_training.processed.noemoticon.csv', 'wb') as fp:
    pickle.dump(X_raw, fp)
with open('y_training.processed.noemoticon.csv', 'wb') as fp:
    pickle.dump(y_raw, fp)

X_raw[:5], y_raw[:5], np.unique(y_raw), len(y_raw)

(array(['<handle> the weekend is nigh',
        "omg <exclam> i'm so keen on going to laser runner over the long weekend <exclam> it looks freaking awesome",
        'had a totally epic conversation with at the grotto just now',
        'summer i love adam brody', '<handle> i cant go either'],
       dtype=object),
 array([1, 1, 1, 1, 0]),
 array([0, 1]),
 55556)

In [5]:
from collections import Counter
import numpy as np
import pickle
with open('X_training.processed.noemoticon.csv', 'rb') as fp:
    X_raw = [tweet for tweet in pickle.load(fp)]
with open('y_training.processed.noemoticon.csv', 'rb') as fp:
    y_raw = pickle.load(fp)

batch_size = 64

def split_tr_val_te(X):
    percent_tr = 0.9
    te_size = batch_size

    tr_size = batch_size*int(np.ceil(percent_tr*len(X)/batch_size))
    val_size = len(X) - tr_size - te_size
    assert(val_size > 0)

    X_val = X[:val_size]
    X_tr = X[val_size:-te_size]
    X_te = X[-te_size:]
    
    return X_tr, X_val, X_te

X_raw_tr, X_raw_val, X_raw_te = split_tr_val_te(X_raw)
y_raw_tr, y_raw_val, y_raw_te = split_tr_val_te(y_raw)

In [6]:
def split_tweets_into_words(X):
    return [[word for word in tweet.split(' ')] for tweet in X]

X_raw_tr = split_tweets_into_words(X_raw_tr)
X_raw_val = split_tweets_into_words(X_raw_val)
X_raw_te = split_tweets_into_words(X_raw_te)

(X_raw_tr[0], X_raw_val[0], X_raw_te[0],
 np.unique(y_raw_tr), np.unique(y_raw_val), np.unique(y_raw_te))

(['ugh',
  '<exclam>',
  'i',
  'rolled',
  'out',
  'of',
  'bed',
  'this',
  'morning',
  '<and>',
  'kelp',
  'rolling',
  'til',
  'i',
  'hit',
  'the',
  'wall',
  '<exclam>',
  'my',
  'head',
  'still',
  'hurt',
  'then',
  'i',
  'got',
  'water',
  'to',
  'drank',
  '<and>',
  'missed',
  'my',
  'face'],
 ['<handle>', 'the', 'weekend', 'is', 'nigh'],
 ['anyways',
  'sorry',
  'twitters',
  '<punct>',
  'goodmorning',
  'the',
  'weathers',
  'nice',
  'here',
  'already',
  '<punct>',
  'i',
  'wanna',
  'go',
  'be',
  'a',
  'beach',
  'bum',
  '<punct>',
  'lol',
  '<punct>'],
 array([0, 1]),
 array([0, 1]),
 array([0, 1]))

In [7]:
# get word counts and vocabulary size
runonsentence = ' '.join([' '.join(tweet) for tweet in X_raw_tr]+
                         [' '.join(tweet) for tweet in X_raw_val]+
                         [' '.join(tweet) for tweet in X_raw_te]).split(' ')

i = 0
j = 0
INCREMENT = 1000000
words = set()
while len(runonsentence[j:]) > 0:
    words = words.union(runonsentence[j:j+INCREMENT])
    i += 1
    j = INCREMENT * i
words = list(words)
words[:5], len(words)

(['beckkyy', 'helpe', 'replace', 'mv', 'finishing'], 40926)

In [8]:
def get_counts_adv(X):
    d1 = dict(zip(words,[0]*len(words)))
    d2 = dict(zip(words,[0]*len(words)))
    d3 = dict(zip(words,[0]*len(words)))
    
    for x in X:
        for w in x:
            d1[w] += 1
            
    m1 = np.mean(list(d1.values()))
    sd1 = np.std(list(d1.values()))
    print(m1,sd1)
    
    for x in X:
        d = Counter()
        for w in x:
            d[w] = 1
        for w in d:
            d2[w] += d[w]
            
    m2 = np.mean(list(d2.values()))
    sd2 = np.std(list(d2.values()))
    print(m2,sd2)
    
    for w in set(w for x in X for w in x):
        d3[w] = 0.4*(d1[w] - m1) / sd1 + 0.6*(d2[w] - m2) / sd2

    return d3

wc_tr = get_counts_adv(X_raw_tr)
# wc_val = get_counts_adv(X_raw_val)
# wc_te = get_counts_adv(X_raw_te)

# for w in words:
#     if wc_tr[w]*wc_val[w]*wc_te[w]==0:
#         del wc_tr[w]
#         del wc_val[w]
#         del wc_te[w]

wordcounts = [len(x) for x in X_raw_tr]
max_words = max(wordcounts)
mean_words = int(np.mean(wordcounts))

def get_most_frequent_keys(d,n):
    a = np.array(list(d.values()),dtype='float')
    N = min(n,len(a))
    p = min(a[np.argpartition(a,-N)][-N:])
    inds = a >= p
    d = dict(np.array(list(d.items()))[inds])
    for k in d:
        d[k] = float(d[k])
    return d

vocab = get_most_frequent_keys(wc_tr, len(words)//10)
print(len(vocab))

def get_counts(i):
    d = Counter()
    for j in i:
        d[j] += 1
    return d

runonsentence = [w if w in vocab else '<UNK>' for w in runonsentence]
word_counts = get_counts(runonsentence)
    
#assert(min(list(word_counts.values())) > 1)

vocabulary_size = len(word_counts)

vocabulary_size, word_counts['<UNK>'], max_words, mean_words

19.316571372721498 468.057692628304
16.90030787274593 295.54582030197133
5111


(5112, 89561, 289, 15)

In [9]:
T=np.array([(0,len(x)) for x in X_raw_tr]+
           [(1,len(x)) for x in X_raw_val]+
           [(2,len(x)) for x in X_raw_te])
T=np.vstack([T.T,np.arange(len(T))]).T
tr=len(X_raw_tr)
val=len(X_raw_val)
T[:,2][tr:] -= tr
T[:,2][tr+val:] -= val

i = 0
for t in T:
    for j in range(t[1]):
        if t[0] == 0:
            X_raw_tr[t[2]][j] = runonsentence[i]
        elif t[0] == 1:
            X_raw_val[t[2]][j] = runonsentence[i]
        else:
            X_raw_te[t[2]][j] = runonsentence[i]
        i += 1

In [10]:
from tensorflow.keras.preprocessing import sequence
# from tensorflow.keras.utils import to_categorical

word2id = {word:i for i, word in enumerate(word_counts)}

def convert_to_id(X_raw):
    return [[word2id[word] for word in tweet] for tweet in X_raw]

n_classes = len(set(y_raw_tr))
assert(n_classes > 1)

def convert_to_one_hot(y):
#     return enc.transform(np.array(y_raw).reshape(-1,1)).toarray()
    return y if n_classes == 2 else to_categorical(y, num_classes=n_classes)

X_tr = sequence.pad_sequences(convert_to_id(X_raw_tr), maxlen=max_words)
X_val = sequence.pad_sequences(convert_to_id(X_raw_val), maxlen=max_words)
X_te = sequence.pad_sequences(convert_to_id(X_raw_te), maxlen=max_words)

y_tr = convert_to_one_hot(y_raw_tr)
y_val = convert_to_one_hot(y_raw_val)
y_te = convert_to_one_hot(y_raw_te)

X_tr[:5], y_tr[:5]

(array([[ 0,  0,  0, ..., 22, 14, 23],
        [ 0,  0,  0, ..., 32, 33, 34],
        [ 0,  0,  0, ..., 44, 45, 34],
        [ 0,  0,  0, ..., 54, 55, 56],
        [ 0,  0,  0, ..., 67, 73,  1]], dtype=int32),
 array([0, 1, 1, 1, 0]))

In [11]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=[0,1],
                                                  y=y_tr)
# class_weights = class_weight.compute_class_weight('balanced',
#                                                   classes=np.arange(len(y_tr[0]))-1,
#                                                   y=_y_tr)

print(np.sum([y_tr==0])*class_weights[0]-np.sum(y_tr[y_tr==1])*class_weights[1])

class_weights = {i:w for i,w in zip(range(len(y_tr)),class_weights)}

class_weights

0.0


{0: 0.987841465340281, 1: 1.012461563359767}

In [12]:
embedding_size = 32

lstm_units = int(np.sqrt(vocabulary_size * embedding_size) / 4)

num_samples = len(X_tr)

dense_units = num_samples // 100

units_a = lstm_units
units_b_backward = lstm_units // 2
units_b_forward = lstm_units - units_b_backward
units_c = lstm_units
units_d = dense_units

dropout_dense = 0.2
dropout_lstm = 1 - np.sqrt(1 - dropout_dense)
dropout_bidir = 1 - np.sqrt(1 - dropout_lstm)

num_classes = len(class_weights)

units_a, units_b_backward, units_b_forward, units_c, units_d

(101, 50, 51, 101, 500)

In [13]:
from tensorflow.keras.initializers import RandomUniform, GlorotUniform, Orthogonal
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Bidirectional, Dense,
    Activation, BatchNormalization, Dropout
)
from tensorflow.keras.regularizers import l1_l2

L2 = l1_l2(l1=0, l2=5e-11)
L1L2 = l1_l2(l1=1e-10, l2=5e-11)

def lstm(units, return_sequences=True, dropout=dropout_lstm, go_backwards=False,
         regularizer=L1L2, activation='tanh', recurrent_activation='sigmoid'):
    return LSTM(units=units, return_sequences=return_sequences,
                dropout=dropout, recurrent_dropout=dropout,
                kernel_regularizer=regularizer, bias_regularizer=regularizer,
                kernel_initializer=GlorotUniform(seed=getSeed()),
                recurrent_initializer=Orthogonal(seed=getSeed()),
                activation=activation, recurrent_activation=recurrent_activation,
                go_backwards=go_backwards)

inputs = Input(batch_input_shape=(None, None))

x = Embedding(vocabulary_size, embedding_size, input_length=max_words,
              embeddings_initializer=RandomUniform(seed=getSeed()),
              embeddings_regularizer=L2)(inputs)

x = lstm(units_a)(x)

forward_layer  = lstm(units_b_forward, dropout=dropout_bidir, regularizer=L2)
backward_layer = lstm(units_b_backward, dropout=dropout_bidir, go_backwards=True)
x = Bidirectional(layer=forward_layer, backward_layer=backward_layer)(x)

x = lstm(units_c, return_sequences=False)(x)

x = Dense(units=units_d, kernel_regularizer=L1L2, bias_regularizer=L1L2,
          kernel_initializer=GlorotUniform(seed=getSeed())
         )(x)

x = BatchNormalization(gamma_regularizer=L2, beta_regularizer=L2)(x)

x = Activation('relu')(x)

x = Dropout(dropout_dense,seed=getSeed())(x)

outputs = Dense(units=1 if num_classes == 2 else num_classes,
                activation='sigmoid' if num_classes == 2 else 'softmax',
                kernel_initializer=GlorotUniform(seed=getSeed()))(x)

model=Model(inputs=inputs, outputs=outputs)

model.summary()


Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 32)          163584    
_________________________________________________________________
lstm (LSTM)                  (None, None, 101)         54136     
_________________________________________________________________
bidirectional (Bidirectional (None, None, 101)         61612     
_________________________________________________________________
lstm_3 (LSTM)                (None, 101)               82012     
_________________________________________________________________
dense (Dense)                (None, 500)               51000     
_________________________________________________________________
batch_normalization (BatchNo (None, 500)              

In [14]:
loss = 'binary_crossentropy' if num_classes == 2 else 'sparse_categorical_crossentropy'
model.compile(loss=loss, optimizer='adam', metrics='accuracy')

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
filepath = 'temp.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1,
                             save_best_only=True, mode='max')
num_epochs = 100
print(num_samples)
model.fit(X_tr,y_tr, validation_data=(X_val,y_val),
          batch_size=batch_size, epochs=num_epochs,
          class_weight=class_weights,
          callbacks=[checkpoint],
          shuffle=False)

50048
Epoch 1/100




 27/782 [>.............................] - ETA: 26:28 - loss: 0.6658 - accuracy: 0.5712

In [None]:
model.load_weights(filepath)

In [None]:
model_loss, model_acc = model.evaluate(X_val, y_val, batch_size=batch_size, verbose=1)

yhat_val = np.squeeze(model.predict(X_val, batch_size=batch_size, verbose=1))

y_true = y_val    # [[0, 1], [0, 0]]
y_pred = yhat_val # [[0.6, 0.4], [0.4, 0.6]]

def get_batch_matrix(y):
    y_matrix = []
    for i in range(int(np.ceil(len(y) / batch_size))):
        x = list(y[i*batch_size:(i+1)*batch_size])
        if len(x) < batch_size:
            x = x + ([np.nan] * (batch_size - len(x)))
        y_matrix.append(tf.cast(x, tf.float32))
    return tf.cast(y_matrix, tf.float32)
y_true_matrix = get_batch_matrix(y_true)
y_pred_matrix = get_batch_matrix(y_pred)

model_loss, model_acc, yhat_val

In [None]:
import tensorflow.keras.backend as K
from tensorflow.keras.losses import Reduction

calc_acc = 0
for i in range(len(y_true)):
    calc_acc += np.mean((np.array(y_pred[i]) > 0.5) * (np.array(y_true[i]) == 1) + \
                        (np.array(y_pred[i]) <= 0.5) * (np.array(y_true[i]) == 0))
calc_acc = calc_acc / len(y_true)

assert(calc_acc == K.mean(K.round(y_pred)==y_true))

calc_acc - model_acc

In [None]:
y_pred_stable = K.clip(tf.cast(y_pred, tf.float32), K.epsilon(), 1-K.epsilon())
logits = np.log(y_pred_stable / (1 - y_pred_stable))
kbce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                          label_smoothing=K.epsilon(),
                                          reduction=Reduction.SUM_OVER_BATCH_SIZE,
                                          name='binary_crossentropy'
                                         )(y_true, logits)
kbce_logits = K.mean(kbce_logits)

logits[y_true] *= class_weights[1]
logits[~y_true] *= class_weights[0]
kbce_logits2 = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                          label_smoothing=K.epsilon(),
                                          reduction=Reduction.SUM_OVER_BATCH_SIZE,
                                          name='binary_crossentropy'
                                         )(y_true, logits)
kbce_logits2 = K.mean(kbce_logits2)

kbce = tf.keras.losses.BinaryCrossentropy(from_logits=False,
                                          label_smoothing=K.epsilon(),
                                          reduction=Reduction.SUM_OVER_BATCH_SIZE,
                                          name='binary_crossentropy'
                                         )(y_true, y_pred)
kbce = K.mean(kbce)
kbce_logits,kbce_logits2,kbce

In [None]:
def wbce(Y, P):
    logloss = []
    scewl_man = []
    scewl_tf = []
    for i in range(len(Y)):
        Y_batch = tf.cast(Y[i], tf.float32)
        P_batch = K.clip(tf.cast(P[i], tf.float32), K.epsilon(), 1-K.epsilon())
        Z_batch = K.log(P_batch / (1 - P_batch))
        logloss.append(-np.nanmean(Y_batch*K.log(P_batch)+(1-Y_batch)*K.log(1-P_batch)))
        scewl_man.append(np.nanmean((1-Y_batch)*Z_batch+K.log(1+K.exp(-K.abs(Z_batch)))+K.maximum(-Z_batch,0)))
        scewl_tf.append(np.nanmean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y_batch,
                                                                       logits=Z_batch)))
    kmean = lambda x: K.mean(tf.cast(x, tf.float32))
    print((kmean(logloss) - kmean(scewl_man)))
    print((kmean(logloss) - kmean(scewl_tf)))
    return kmean(logloss)

logloss = wbce(y_true_matrix, y_pred_matrix)
logloss - kbce, logloss

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score
confusion_matrix(y_true, np.round(y_pred)), \
precision_score(y_true, np.round(y_pred)), recall_score(y_true, np.round(y_pred))

In [None]:
np.mean(np.round(y_val))

In [None]:
def binary_crossentropy(y_true, y_pred, sample_weight=1):
    if len(y_pred.shape)==1:
        y_pred = np.atleast_2d(y_pred).T
    y_pred = [max(min(pred[0], 1-K.epsilon()), K.epsilon()) for pred in y_pred]
    y_true,y_pred,sample_weight = force_2d_shape([y_true,y_pred,sample_weight])

    logits = np.log(y_pred) - np.log(1-y_pred) # sigmoid inverse
    neg_abs_logits = -np.abs(logits)
    relu_logits    = (logits > 0)*logits

    loss_vec = relu_logits - logits*y_true + np.log(1 + np.exp(neg_abs_logits))
    return np.mean(sample_weight*loss_vec)

def force_2d_shape(arr_list):
    for arr_idx, arr in enumerate(arr_list):
        if len(np.array(arr).shape) != 2:
            arr_list[arr_idx] = np.atleast_2d(arr).T
    return arr_list

def l1l2_weight_loss(model):
    l1l2_loss = 0
    for layer in model.layers:
        if 'layer' in layer.__dict__ or 'cell' in layer.__dict__:
            l1l2_loss += l1l2_rnn_loss(layer)
            continue

        if 'kernel_regularizer' in layer.__dict__ or \
           'bias_regularizer'   in layer.__dict__:
            l1l2_lambda_k, l1l2_lambda_bias = [0,0], [0,0] # defaults
            if layer.__dict__['kernel_regularizer'] is not None:
                l1l2_lambda_k = list(layer.kernel_regularizer.__dict__.values())
            if layer.__dict__['bias_regularizer']   is not None:
                l1l2_lambda_bias = list(layer.bias_regularizer.__dict__.values())

            l1l2_loss += compute_l1l2_losses(layer, l1l2_lambda_k + l1l2_lambda_bias)

        if 'embeddings_regularizer' in layer.__dict__:
            l1l2_lambda_e = [0,0]
            if layer.__dict__['embeddings_regularizer'] is not None:
                l1l2_lambda_e = list(layer.embeddings_regularizer.__dict__.values())
                
            l1l2_loss += compute_l1l2_losses(layer, l1l2_lambda_e)

        if 'gamma_regularizer' in layer.__dict__ or \
           'beta_regularizer'  in layer.__dict__:
            l1l2_lambda_g, l1l2_lambda_beta = [0,0], [0,0]
            if layer.__dict__['gamma_regularizer'] is not None:
                l1l2_lambda_g = list(layer.gamma_regularizer.__dict__.values())
            if layer.__dict__['beta_regularizer']  is not None:
                l1l2_lambda_beta = list(layer.beta_regularizer.__dict__.values())

            l1l2_loss += compute_l1l2_losses(layer, l1l2_lambda_g + l1l2_lambda_beta)
            
    return l1l2_loss

def l1l2_rnn_loss(layer):
    l1l2_loss = 0
    if 'backward_layer' in layer.__dict__:
        forward_layer = layer.forward_layer
        backward_layer = layer.backward_layer
        bidirectional = True
    else:
        forward_layer = layer
        bidirectional = False
        
    l1l2_loss += _l1l2_rnn_loss(forward_layer, bidirectional)
    if bidirectional:
        l1l2_loss += _l1l2_rnn_loss(backward_layer, bidirectional)

    return l1l2_loss  

def _l1l2_rnn_loss(layer, bidirectional):
    ldict = layer.cell.__dict__

    if 'kernel_regularizer'    in ldict or \
       'recurrent_regularizer' in ldict or \
       'bias_regularizer'      in ldict:
        l1l2_lambda_k, l1l2_lambda_r, l1l2_lambda_bias = [0,0], [0,0], [0,0]
        if ldict['kernel_regularizer']    is not None:
            l1l2_lambda_k = list(layer.kernel_regularizer.__dict__.values())
        if ldict['recurrent_regularizer'] is not None:
            l1l2_lambda_r = list(layer.recurrent_regularizer.__dict__.values())
        if ldict['bias_regularizer']      is not None:
            l1l2_lambda_bias = list(layer.bias_regularizer.__dict__.values())

        all_lambda = l1l2_lambda_k + l1l2_lambda_r + l1l2_lambda_bias
        return compute_l1l2_losses(layer, all_lambda, bidirectional)
    else:
        return 0

def compute_l1l2_losses(layer, l1l2_lambda, bidirectional=False):
    print(layer)
    print(set([x if 'regularizer' in x else '' for x in layer.__dict__]))
    
    l1l2_loss = 0
    if any([(_lambda != 0) for _lambda in l1l2_lambda]):
        W = layer.get_weights()
        if len(W) == 0: return 0
        idx_incr = len(W)//2 # accounts for 'use_bias'

        for idx,_lambda in enumerate(l1l2_lambda):
            if _lambda != 0:
                print(idx,_lambda)
                _pow = 2**(idx % 2) # 1 if idx is even (l1), 2 if odd (l2)
                l1l2_loss += _lambda*np.sum(np.abs(W[idx//2])**_pow)
                print(l1l2_loss)
                
#                 if bidirectional:
#                     l1l2_loss += _lambda*np.sum(
#                                 np.abs(W[idx//2 + idx_incr])**_pow)
#                     print(l1l2_loss)
                    
    return l1l2_loss

In [None]:
sample_weights = np.array([class_weights[label] for label in y_val])
custom_loss  = binary_crossentropy(y_true, y_pred,
                                   sample_weight=sample_weights)
print(custom_loss/kbce)
custom_loss = kbce
reg_losses = l1l2_weight_loss(model)
print(reg_losses/custom_loss)
custom_loss += reg_losses
reg_losses/model_loss, custom_loss/model_loss-1