In [None]:
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow.keras.backend as K
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import *


In [None]:
tmp = np.load('../input/bugrep/bug_repair_train.npz')
X = tmp['X']
Y = tmp['Y']
max_len = X.shape[1]

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()
print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
def categorical_crossentropyx(trues, preds):
    t = tf.one_hot(trues,depth=50265)
    return tf.metrics.categorical_crossentropy(t,preds)

In [None]:
with strategy.scope():
    input1 = tfk.layers.Input(shape=(max_len, ), name='input')
    
    x = tfk.layers.Embedding(input_dim=50265, output_dim=512, name='embds')(input1)
    
    x1 = tfk.layers.Conv1D( 512, 11, strides=1, padding='same', activation='relu', name='conv1')(x)
    x2 = tfk.layers.Conv1D( 512, 7, strides=1, padding='same', activation='relu', name='conv2')(x)
    x3 = tfk.layers.Conv1D( 512, 3, strides=1, padding='same', activation='relu', name='conv3')(x)
    x4 = tfk.layers.Conv1D( 512, 1, strides=1, padding='same', activation='relu', name='conv4')(x)

    model = tfk.layers.concatenate([x, x1, x2, x3,x4], axis=-1, name='concat')

    model = tfk.layers.TimeDistributed( tfk.layers.Dense(2056, activation='relu'), name='td')(model)

    output = tfk.layers.TimeDistributed( tfk.layers.Dense(50265, activation='softmax') ,name='output')(model)

    model = tfk.models.Model(input1, output)
    model.compile(loss=categorical_crossentropyx, metrics=['acc'], optimizer='adam')

    model.summary()

In [None]:
model.fit(X, Y, verbose=1, batch_size=8, epochs=10)

In [None]:
model.save_weights('m1.h5')
# model.load_weights('m1.h5')

In [None]:
tmp = np.load('../input/bugrep/bug_repair_test.npz')
X_ = tmp['X']
Y_ = tmp['Y']


In [None]:
preds = model.predict(X_[:4],batch_size=4)
preds = np.argmax(preds,axis=-1)
for i in tqdm(range(1,(len(X_)//4)+1)):
    tmp = model.predict(X_[i*4:(i+1)*4],batch_size=4)
    preds = np.append(preds,np.argmax(tmp,axis=-1),axis=0)


In [None]:
y_t = Y_.flatten()
y_p = np.array(preds).flatten()
y_p = y_p[(y_t!=1)&(y_t!=0)&(y_t!=2)]
y_t = y_t[(y_t!=1)&(y_t!=0)&(y_t!=2)]
print(accuracy_score(y_t,y_p))
