In [None]:
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow.keras.backend as K
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from sklearn.metrics import *


In [None]:
tmp1 = np.load('../input/semproject/abc.npz')
tmp2 = np.load('../input/semproject/xyz.npz')
X = np.append(tmp1['x'],tmp2['x'],axis=0)
Y = np.append(tmp1['y'],tmp2['y'],axis=0)
max_len = X.shape[1]

In [None]:
a,b = np.unique(Y,return_counts=True)
y_dict = {v:i for i,v in enumerate(a[b>20])}
X = X[[i in y_dict for i in Y]]
Y = np.array([y_dict[i] for i in Y if i in y_dict ])
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, train_size=0.7)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()
print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
def categorical_crossentropyx(trues, preds):
    t = tf.one_hot(trues[:,0],depth=len(y_dict))
    return tf.metrics.categorical_crossentropy(t,preds)

In [None]:
with strategy.scope():
    input1 = tfk.layers.Input(shape=(max_len, ), name='input')
    
    x = tfk.layers.Embedding(input_dim=50265, output_dim=512, name='embds')(input1)
    
    x1 = tfk.layers.Conv1D( 256, 11, strides=1, padding='same', activation='relu', name='conv1')(x)
    x2 = tfk.layers.Conv1D( 256, 7, strides=1, padding='same', activation='relu', name='conv2')(x)
    x3 = tfk.layers.Conv1D( 256, 3, strides=1, padding='same', activation='relu', name='conv3')(x)
    x4 = tfk.layers.Conv1D( 256, 1, strides=1, padding='same', activation='relu', name='conv4')(x)

    x = tfk.layers.concatenate([x, x1, x2, x3,x4], axis=-1, name='concat')

    model = tfk.layers.TimeDistributed( tfk.layers.Dense(128, activation='relu'), name='td')(x)
    model = tfk.layers.Flatten()(model)

    output = tfk.layers.Dense(len(y_dict), activation='softmax' ,name='output')(model)

    model = tfk.models.Model(input1, output)
    model.compile(loss=categorical_crossentropyx, metrics=['acc'], optimizer='adam')

    model.summary()

In [None]:
model.fit(x_train, y_train, verbose=1, batch_size=64, epochs=10)

In [None]:
model.save_weights('m1.h5')

In [None]:
preds = model.predict(x_test, batch_size=64, verbose=1)

In [None]:
y_dict_ = {v:i for i,v in y_dict.items()}
y_t = np.array([y_dict_[i] for i in y_test if i in y_dict_])
y_p = np.array([y_dict_[np.argmax(preds[i])] for i,v in enumerate(Y_) if v in y_dict_])
accuracy_score(y_t,y_p)
