# **Setup**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp '/content/drive/MyDrive/Code Review /Train.csv' .
!cp '/content/drive/MyDrive/Code Review /Test.csv' .

# **Libraries**

In [None]:
import pandas as pd 
import numpy as np 

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit

import cloudpickle
from google_drive_downloader import GoogleDriveDownloader as gdd

import warnings
warnings.filterwarnings("ignore")

In [None]:
import tensorflow as tf
import keras
from keras.layers import Concatenate , Input ,concatenate
from keras.models import Sequential , Model
from keras.layers.recurrent import LSTM, GRU
from keras.layers.embeddings import Embedding
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout
from keras.layers import GlobalMaxPooling1D, Conv1D, MaxPooling1D, Flatten, Bidirectional, SpatialDropout1D
from keras.preprocessing import sequence, text
from keras.callbacks import EarlyStopping
from keras.metrics import Accuracy

# **Load Data**

In [None]:
train = pd.read_csv('Train.csv') 
test = pd.read_csv('Test.csv') 

In [None]:
LABEL = 'Label'

* ## The vecotrizer was generated from The Notebook ***Distilation.ipynb***

In [16]:
file_id = '1--fYtP9-svx6-j22HQmAeG7EDxYL1ci7'

destination = '/content/vectorizer.txt'
gdd.download_file_from_google_drive(file_id, destination)

Downloading 1--fYtP9-svx6-j22HQmAeG7EDxYL1ci7 into /content/vectorizer.txt... Done.


# **Utils**

In [24]:
def CUSTOM_ANN_CNN(shape1) :
  # define two sets of inputs
  input_1 = tf.keras.layers.Input(shape = (shape1),name='20_Feats')

  input_2 = tf.keras.layers.Input(shape = (shape1, 1),name='20_Feats_Conv')
  
  # create Convolution Layer 
  head_conv = Convolution1D(filters=128, kernel_size=1, input_shape=(shape1, 1),name='Conv_Layer')(input_2)
  head_conv = Activation('relu',name='Conv_Activation')(head_conv)
  head_conv = Flatten()(head_conv)
  
  # create a simple 2 layers for input_1
  # Layer 1 -- input_1
  head_1 = tf.keras.layers.BatchNormalization(name='20_Feat_BatchNormalization_1')(input_1)
  head_1 = tf.keras.layers.Dense(256, activation = "relu",name='20_Feats_Layer_1')(head_1)
  head_1 = tf.keras.layers.Dropout(0.15,name='20_Feats_DropOut_1')(head_1)
  # Layer 2 -- input_1
  head_1 = tf.keras.layers.BatchNormalization(name='20_Feats_BatchNormalization_2')(head_1)
  head_1 = tf.keras.layers.Dense(128, activation = "relu",name='20_Feats_Output_Layer_2')(head_1)
  out_1 = tf.keras.layers.Dropout(0.4,name='20_Feats_Output_DropOut_2')(head_1)

  # combine the output vector of the two branches
  input_3 = tf.keras.layers.Concatenate(name='Concatenate_Outputs')([out_1,head_conv])
  
  # create a Fully conected Layers
  head_3 = tf.keras.layers.BatchNormalization(name='Outputs_Features_BatchNormalization')(input_3)
  head_3 = tf.keras.layers.Dense(160, "relu",name='FC_Layer_1')(head_3)
  head_3 = tf.keras.layers.Dropout(0.3,name='FC_DropOut_1')(head_3)
  head_3 = tf.keras.layers.Dense(80, "relu",name='FC_Layer_2')(head_3)
  head_3 =  tf.keras.layers.Dropout(0.4,name='FC_DropOut_2')(head_3)
  
  output = tf.keras.layers.Dense(20, activation = "softmax",name='FC_Output')(head_3)
  
  model = tf.keras.models.Model(inputs = [input_1,input_2], outputs = output)
  model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=[tf.keras.metrics.CategoricalAccuracy()])
  print("hhrhhr", model)
  return model 

In [18]:
# Function to seed everything
import random , os
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

# **Training**

In [25]:
# custom vectorizer from my brother

vectorizer = cloudpickle.load(open('/content/drive/MyDrive/Code Review /vectorizer_v2.pkl', 'rb'))
X_train = vectorizer.predict(train['Text'])
X_test = vectorizer.predict(test['Text'])

# reshape test data For Conv Layers Input
X_test_r = np.zeros((len(X_test), 20, 1))
X_test_r[:, :, 0] = X_test[:, :]

y_train = train[LABEL].copy()
n_labels = train[LABEL].unique().shape[0]

# we need to binarize the labels for the neural net
from sklearn.preprocessing import  LabelEncoder
LE = LabelEncoder()
ytrain_enc = pd.get_dummies(y_train).values
TARGETS = pd.get_dummies(y_train).columns

y_oof = np.zeros([X_train.shape[0], n_labels])
y_test = np.zeros([X_test.shape[0], n_labels])

i = 0
metrics = list()
apply_aug = False

[[ 1.32852865e-02  8.03018472e-02  4.10254681e-02 ...  6.11506047e-03
   1.50053538e-02  9.12577048e-03]
 [ 1.46054121e-03  3.67957733e-03  4.41602116e-02 ...  1.37401380e-03
   2.78805320e-02 -6.06420693e-04]
 [ 2.51955480e-03  3.33941469e-03  4.13555597e-02 ...  8.26857857e-03
   5.98691371e-03  3.35857628e-03]
 ...
 [ 2.09020959e-02  3.56145483e-02  5.73942266e-02 ...  2.31643513e-02
   2.41675301e-02  2.03038722e-02]
 [ 1.74409845e-02  7.54672089e-02  1.43266177e-02 ...  5.89139705e-03
   2.00834832e-02  4.13165273e-02]
 [ 1.04815187e-03  6.13718522e-03  7.53259437e-02 ...  2.46917771e-03
  -2.42929163e-03  1.14359006e-05]]


In [26]:
n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, random_state=47, shuffle=True)


for tr_idx, val_idx in kf.split(X_train, y_train):

    X_tr, X_vl = X_train[tr_idx, :], X_train[val_idx, :]
    y_tr, y_vl = ytrain_enc[tr_idx], ytrain_enc[val_idx]
    y_train_, y_vld_ = y_train[tr_idx], y_train[val_idx]

    # reshape train data For Conv Layers Input
    X_train_r = np.zeros((len(X_tr), 20, 1))
    X_train_r[:, :, 0] = X_tr[:, :]

    # reshape validation data For Conv Layers Input
    X_valid_r = np.zeros((len(X_vl), 20, 1))
    X_valid_r[:, :, 0] = X_vl[:, :]
    
    # 1 CNN ANN
    seed_everything(seed=1)
    model_cnn_nn = CUSTOM_ANN_CNN(shape1=20)
    model_cnn_nn.fit([X_tr,X_train_r], y=y_tr, batch_size=8, epochs=8, verbose=0,
                     validation_data=([X_vl,X_valid_r], y_vl))
    y_pred_cnn_nn = model_cnn_nn.predict([X_vl,X_valid_r])
    test_pred_cnn_nn = model_cnn_nn.predict([X_test,X_test_r])
    
    
    # 2 - MLP
    model1 = MLPClassifier(250, random_state=47) 
    model1.fit(X_tr, y_train_)
    y_pred1    = model1.predict_proba(X_vl)
    test_pred1 = model1.predict_proba(X_test)
    
    y_pred = (y_pred1+y_pred_cnn_nn ) / 2.0
    y_oof[val_idx, :] = y_pred
    y_tr, y_vl = y_train.iloc[tr_idx], y_train.iloc[val_idx]
    metric = accuracy_score(y_vl, TARGETS[np.argmax(y_pred,axis=1)])
    print("fold #{} val_accuracy: {}".format(i, metric))
    
    i += 1
    test_pred = (test_pred1 + test_pred_cnn_nn ) / 2.0
    y_test += test_pred / n_splits
    metrics.append(metric)
    
metrics = np.array(metrics).mean()
print(f'Full accuracy {metrics}') 

The following Variables were used a Lambda layer's call (tf.compat.v1.nn.conv1d_1), but
are not present in its tracked objects:
  <tf.Variable 'Conv_Layer/kernel:0' shape=(1, 1, 128) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (tf.nn.bias_add_1), but
are not present in its tracked objects:
  <tf.Variable 'Conv_Layer/bias:0' shape=(128,) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


ValueError: ignored

In [None]:
# Prepare submission
y_sub = np.argmax(y_test, 1)
y_sub = pd.DataFrame({LABEL: y_sub})
class_ = TARGETS
y_sub[LABEL] = y_sub[LABEL].apply(lambda x: class_[x])
sub = test[['ID']]
sub['LABEL'] = y_sub[LABEL]
sub.head()

In [None]:
sub.to_csv(f'Final_submission.csv', index=False)