In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from functools import partial
import tensorflow as tf

In [None]:
import tensorflow as tf
from tensorflow.python.ops import nn
from tensorflow.python.keras import activations, regularizers, initializers, constraints, engine
from tensorflow.python.keras.utils import conv_utils
from tensorflow.python.keras.layers import Layer, deserialize, Conv1D
from tensorflow.python.keras import backend as K
from tensorflow.python.ops import array_ops


__all__ = ['KernelConv2D']


class KernelConv2D(Layer):
    """2D convolution layer (e.g. spatial convolution over images).
    This layer creates a convolution kernel that is convolved
    with the layer input to produce a tensor of
    outputs. If `use_bias` is True,
    a bias vector is created and added to the outputs. Finally, if
    `activation` is not `None`, it is applied to the outputs as well.
    When using this layer as the first layer in a model,
    provide the keyword argument `input_shape`
    (tuple of integers, does not include the sample axis),
    e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures
    in `data_format="channels_last"`.
    # Arguments
        filters: Integer, the dimensionality of the output space
            (i.e. the number of output filters in the convolution).
        kernel_size: An integer or tuple/list of 2 integers, specifying the
            height and width of the 2D convolution window.
            Can be a single integer to specify the same value for
            all spatial dimensions.
        kernel_function: A layer takes the columnized feature and the kernel as its inputs.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution
            along the height and width.
            Can be a single integer to specify the same value for
            all spatial dimensions.
            Specifying any stride value != 1 is incompatible with specifying
            any `dilation_rate` value != 1.
        padding: one of `"valid"` or `"same"` (case-insensitive).
            Note that `"same"` is slightly inconsistent across backends with
            `strides` != 1, as described
            [here](https://github.com/keras-team/keras/pull/9473#issuecomment-372166860)
        data_format: A string,
            one of `"channels_last"` or `"channels_first"`.
            The ordering of the dimensions in the inputs.
            `"channels_last"` corresponds to inputs with shape
            `(batch, height, width, channels)` while `"channels_first"`
            corresponds to inputs with shape
            `(batch, channels, height, width)`.
            It defaults to the `image_data_format` value found in your
            Keras config file at `~/.keras/keras.json`.
            If you never set it, then it will be "channels_last".
        dilation_rate: an integer or tuple/list of 2 integers, specifying
            the dilation rate to use for dilated convolution.
            Can be a single integer to specify the same value for
            all spatial dimensions.
            Currently, specifying any `dilation_rate` value != 1 is
            incompatible with specifying any stride value != 1.
        activation: Activation function to use
            (see [activations](../activations.md)).
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        use_bias: Boolean, whether the layer uses a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix.
        bias_initializer: Initializer for the bias vector.
        kernel_regularizer: Regularizer function applied to
            the `kernel` weights matrix.
        bias_regularizer: Regularizer function applied to the bias vector.
        activity_regularizer: Regularizer function applied to
            the output of the layer (its "activation").
        kernel_constraint: Constraint function applied to the kernel matrix.
        bias_constraint: Constraint function applied to the bias vector.
    # Input shape
        4D tensor with shape:
        `(batch, channels, rows, cols)`
        if `data_format` is `"channels_first"`
        or 4D tensor with shape:
        `(batch, rows, cols, channels)`
        if `data_format` is `"channels_last"`.
    # Output shape
        4D tensor with shape:
        `(batch, filters, new_rows, new_cols)`
        if `data_format` is `"channels_first"`
        or 4D tensor with shape:
        `(batch, new_rows, new_cols, filters)`
        if `data_format` is `"channels_last"`.
        `rows` and `cols` values might have changed due to padding.
    """
    def __init__(self, filters,
                 kernel_size,
                 kernel_function,
                 strides=1,
                 padding='valid',
                 data_format=None,
                 dilation_rate=1,
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        super(KernelConv2D, self).__init__(**kwargs)
        self.rank = 1
        self.filters = filters
        self.kernel_function = kernel_function
        self.kernel_size = conv_utils.normalize_tuple(kernel_size, self.rank, 'kernel_size')
        self.strides = conv_utils.normalize_tuple(strides, self.rank, 'strides')
        self.padding = conv_utils.normalize_padding(padding)
        self.data_format = conv_utils.normalize_data_format(data_format)
        self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, self.rank, 'dilation_rate')
        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)
        self.input_spec = engine.base_layer.InputSpec(ndim=self.rank + 2)

        self.kernel = self.bias = None

    def build(self, input_shape):
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape.dims[channel_axis].value is None:
            raise ValueError('The channel dimension of the inputs should be defined. Found `None`.')
        input_dim = int(input_shape[channel_axis])
        kernel_shape = self.kernel_size + (input_dim, self.filters)

        self.kernel = self.add_weight(
            shape=kernel_shape,
            initializer=self.kernel_initializer,
            name='kernel',
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )
        if self.use_bias:
            self.bias = self.add_weight(
                shape=(self.filters,),
                initializer=self.bias_initializer,
                name='bias',
                regularizer=self.bias_regularizer,
                constraint=self.bias_constraint,
            )
        else:
            self.bias = None

        self.input_spec = engine.base_layer.InputSpec(
            ndim=self.rank + 2,
            axes={channel_axis: input_dim},
        )
        self.kernel_function.build([input_shape, kernel_shape])
        super(KernelConv2D, self).build(input_shape)

    def _compute_causal_padding(self):
      left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
      if self.data_format == 'channels_last':
        causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
      else:
        causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
      return causal_padding

    def call(self, inputs):
        data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2)
        inputs, tf_data_format = K._preprocess_conv2d_input(inputs, self.data_format)

        '''inputs = tf.compat.v1.extract_image_patches(
            inputs,
            ksizes=(1,) + K.int_shape(self.kernel)[:2] + (1,),
            strides=(1,) + self.strides + (1,) + (1,) ,
            rates=(1,) + self.dilation_rate + (1,) + (1,),
            padding=self.padding.upper(),
        )'''
        inputs = array_ops.pad(inputs, self._compute_causal_padding())

        kernel = K.reshape(self.kernel, (-1, self.filters))
        outputs = self.kernel_function([inputs, kernel])

        if self.data_format == 'channels_first':
            outputs = K.permute_dimensions(outputs, (0, 1, 2))

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias, data_format=data_format)

        if self.activation is not None:
            outputs = self.activation(outputs)
        return outputs

    def compute_output_shape(self, input_shape):
        if self.data_format == 'channels_last':
            space = input_shape[1:-1]
            new_space = []
            for i in range(len(space)):
                new_dim = conv_utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return (input_shape[0],) + tuple(new_space) + (self.filters,)
        if self.data_format == 'channels_first':
            space = input_shape[2:]
            new_space = []
            for i in range(len(space)):
                new_dim = conv_utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return (input_shape[0], self.filters) + tuple(new_space)

    def get_config(self):
        config = {
            'filters': self.filters,
            'kernel_size': self.kernel_size,
            'kernel_function': {
                'class_name': self.kernel_function.__class__.__name__,
                'config': self.kernel_function.get_config(),
            },
            'strides': self.strides,
            'padding': self.padding,
            'data_format': self.data_format,
            'dilation_rate': self.dilation_rate,
            'activation': activations.serialize(self.activation),
            'use_bias': self.use_bias,
            'kernel_initializer': initializers.serialize(self.kernel_initializer),
            'bias_initializer': initializers.serialize(self.bias_initializer),
            'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
            'bias_regularizer': regularizers.serialize(self.bias_regularizer),
            'activity_regularizer': regularizers.serialize(self.activity_regularizer),
            'kernel_constraint': constraints.serialize(self.kernel_constraint),
            'bias_constraint': constraints.serialize(self.bias_constraint)
        }
        base_config = super(KernelConv2D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @classmethod
    def from_config(cls, config, custom_objects=None):
        config['kernel_function'] = deserialize(
            config.pop('kernel_function'),
            custom_objects=custom_objects,
        )
        return cls(**config)

In [None]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Flatten, Dense,AveragePooling1D,GRU

In [None]:
class GaussianKernel(Layer):

    def __init__(self, gamma, **kwargs):
        super(GaussianKernel, self).__init__(**kwargs)
        self.gamma = gamma

    def call(self, inputs, **kwargs):
        x, kernel = K.expand_dims(inputs[0], axis=-1), inputs[1]
        return K.exp(-self.gamma * K.sum(K.square(x - kernel), axis=-2))

    def get_config(self):
        config = {'gamma': self.gamma}
        base_config = super(GaussianKernel, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
class PolynomialKernel(Layer):

    def __init__(self, p,
                 c=0.0,
                 trainable_c=False,
                 initializer='zeros',
                 regularizer=None,
                 constraint=None,
                 **kwargs):
        super(PolynomialKernel, self).__init__(**kwargs)
        self.p = p
        self.c = c
        self.oc = c
        self.trainable_c = trainable_c
        self.initializer = initializers.get(initializer)
        self.regularizer = regularizers.get(regularizer)
        self.constraint = constraints.get(constraint)

    def build(self, input_shape):
        if self.trainable_c:
            self.c = self.add_weight(
                shape=(),
                initializer=self.initializer,
                regularizer=self.regularizer,
                constraint=self.constraint,
                name='{}_c'.format(self.name),
            )
        super(PolynomialKernel, self).build(input_shape)

    def call(self, inputs, **kwargs):
        return (K.dot(inputs[0], inputs[1]) + self.c) ** self.p

    def get_config(self):
        config = {
            'p': self.p,
            'c': self.oc,
            'trainable_c': self.trainable_c,
            'initializer': initializers.serialize(self.initializer),
            'regularizer': regularizers.serialize(self.regularizer),
            'constraint': initializers.serialize(self.constraint),
        }
        base_config = super(PolynomialKernel, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


In [None]:
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis.
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex / K.sum(ex, axis=axis, keepdims=True)


# define the margin loss like hinge loss
def margin_loss(y_true, y_pred):
    lamb, margin = 0.5, 0.1
    return K.sum(y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * (
        1 - y_true) * K.square(K.relu(y_pred - margin)), axis=-1)


class Capsule(Layer):
    """A Capsule Implement with Pure Keras
    There are two vesions of Capsule.
    One is like dense layer (for the fixed-shape input),
    and the other is like timedistributed dense (for various length input).

    The input shape of Capsule must be (batch_size,
                                        input_num_capsule,
                                        input_dim_capsule
                                       )
    and the output shape is (batch_size,
                             num_capsule,
                             dim_capsule
                            )

    Capsule Implement is from https://github.com/bojone/Capsule/
    Capsule Paper: https://arxiv.org/abs/1710.09829
    """

    def __init__(self,
                 num_capsule,
                 dim_capsule,
                 routings=3,
                 share_weights=True,
                 activation='squash',
                 **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)

    def build(self, input_shape):
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(1, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),
                initializer='glorot_uniform',
                trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(input_num_capsule, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),
                initializer='glorot_uniform',
                trainable=True)

    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)

In [None]:
pip install transformers



In [None]:
import numpy as np
import pandas as pd
import torch
import transformers  as ppb 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

In [None]:
model_class, tokenizer_class, pretrained_weights = (ppb.DistilBertModel, ppb.DistilBertTokenizer, 'distilbert-base-uncased')

#We can use BERT but here I am using DistillBERT because BERT requires more RAM then available in the colab,but to use BERT just uncomment the next line and comment the previous line
#model_class, tokenizer_class, pretrained_weights = (ppb.BertModel, ppb.BertTokenizer, 'bert-base-uncased')

# Load pretrained model/tokenizer
tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
model = model_class.from_pretrained(pretrained_weights)
#To run the model on GPU
#model.cuda()  

In [None]:
#Importing the dataset
import pandas as pd
 
dataset=pd.read_csv('/content/1000_tweets_2_user_new_non-bot.csv')
#dataset.iloc[:,0:1].fillna('other', inplace=True)
y=dataset.iloc[:,1].values

In [None]:
dataset.shape

(2000, 2)

In [None]:
k=0
X_train=[]
y_train=[]
for i in range(0,2):
  for j in range(k,k+980):
    X_train.append(dataset.iloc[j,0])
    y_train.append(dataset.iloc[j,1])
  k+=1000

In [None]:
k=980
X_test=[]
y_test=[]
for i in range(0,2):
  for j in range(k,k+20):
    X_test.append(dataset.iloc[j,0])
    y_test.append(dataset.iloc[j,1])
  k+=1000

In [None]:
X_train=pd.DataFrame(X_train)
X_test=pd.DataFrame(X_test)
y_train=pd.DataFrame(y_train)
y_test=pd.DataFrame(y_test)
X_train=X_train.iloc[:,:].values
X_test=X_test.iloc[:,:].values
y_train=y_train.iloc[:,:].values
y_test=y_test.iloc[:,:].values

In [None]:
train=np.concatenate((X_train,y_train),axis=1)
test=np.concatenate((X_test,y_test),axis=1)

In [None]:
np.random.shuffle(train)
np.random.shuffle(test)

In [None]:
train=pd.DataFrame(train)
test=pd.DataFrame(test)

In [None]:
X_test.shape

(40, 1)

In [None]:
y_test

array([[71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [71166537],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064],
       [18123064]])

In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [None]:
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer,PorterStemmer
from nltk.corpus import stopwords
import re
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer() 

def preprocess(sentence):
  sentence=str(sentence)
  sentence = sentence.lower()
  sentence=sentence.replace('{html}',"") 
  cleanr = re.compile('<.*?>#@')
  cleantext = re.sub(cleanr, '', sentence)
  rem_url=re.sub(r'http\S+', '',cleantext)
  rem_num = re.sub('[0-9]+', '', rem_url)
  tokenizer = RegexpTokenizer(r'\w+')
  tokens = tokenizer.tokenize(rem_num)  
  filtered_words = [w for w in tokens if len(w) > 2 if not w in stopwords.words('english')]
  stem_words=[stemmer.stem(w) for w in filtered_words]
  lemma_words=[lemmatizer.lemmatize(w) for w in stem_words]
  return " ".join(filtered_words)


train.iloc[:,0]=train.iloc[:,0].map(lambda s:preprocess(s))
test.iloc[:,0]=test.iloc[:,0].map(lambda s:preprocess(s))

In [None]:
X_train=train.iloc[:,0]
X_test=test.iloc[:,0]
y_train=train.iloc[:,1]
y_test=test.iloc[:,1]

In [None]:
y_test

[51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 51964081,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,
 90078731,

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_Y = encoder.transform(y_train)
# convert integers to dummy variables (i.e. one hot encoded)
y_train = np_utils.to_categorical(encoded_Y)

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
encoder = LabelEncoder()
encoder.fit(y_test)
encoded_Y = encoder.transform(y_test)
# convert integers to dummy variables (i.e. one hot encoded)
y_test = np_utils.to_categorical(encoded_Y)

In [None]:
y_train

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       ...,
       [1., 0.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [None]:
y_test

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [None]:
y_train.shape

(1960, 2)

In [None]:
X_train=pd.DataFrame(X_train)

In [None]:
tokenized = X_train.iloc[:,0].apply((lambda x: tokenizer.encode(str(x), add_special_tokens=True)))

In [None]:

max_len = 0
for i in tokenized.values:
    if len(i) > max_len:
        max_len = len(i)

padded = np.array([i + [0]*(max_len-len(i)) for i in tokenized.values])

In [None]:
#for adding paddings
input_ids = torch.tensor(np.array(padded))


In [None]:
#to set the paddings to zero and rest to 1
attention_mask = np.where(padded != 0, 1, 0)
attention_mask.shape

(1960, 77)

In [None]:
input_ids = (torch.tensor(padded))
attention_mask = (torch.tensor(attention_mask))

In [None]:
with torch.no_grad():
    last_hidden_states_train = model(input_ids,attention_mask)
    

In [None]:
X_train=last_hidden_states_train[0].numpy()


In [None]:
X_train.shape

(1960, 77, 768)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_eval, y_train, y_eval = train_test_split(last_hidden_states_train[0].numpy(),y_train, test_size = 0.05, random_state = 0)

In [None]:
X_train

array([[[-9.36981291e-02,  2.27245018e-02,  7.05619305e-02, ...,
         -1.36370838e-01,  2.12881833e-01,  3.43180418e-01],
        [ 1.07830115e-01,  2.04664588e-01, -1.65369362e-01, ...,
         -6.50881827e-02,  4.73617852e-01,  1.52222201e-01],
        [-1.66791171e-01,  1.30414575e-01,  2.72202075e-01, ...,
          1.95829552e-02, -2.45518342e-01, -5.60819805e-01],
        ...,
        [ 6.28547743e-02,  4.42062430e-02,  1.98746622e-01, ...,
          2.17413763e-03, -1.16380632e-01,  1.62709370e-01],
        [ 9.43285897e-02, -5.66063933e-02,  1.50588332e-02, ...,
         -2.19485201e-02, -1.53976813e-01,  2.01601833e-01],
        [-6.33209944e-03, -1.23350672e-01,  1.96025103e-01, ...,
         -9.41810906e-02, -2.17934832e-01,  1.28147349e-01]],

       [[ 9.05493349e-02, -1.89240023e-01,  1.81339130e-01, ...,
         -1.00806184e-01,  2.46982887e-01,  4.44152653e-01],
        [ 3.38109821e-01,  1.10043384e-01,  4.33619171e-01, ...,
         -2.00119346e-01,  4.42347199e

In [None]:
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.layers import Input, Dense, Dropout, Flatten
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=3)  

In [None]:
gru_len = 128
Routings = 5
Num_capsule = 10
Dim_capsule = 16
dropout_p = 0.3
rate_drop_dense = 0.3

In [None]:

classifier = Sequential()
classifier.add(KernelConv2D(
    input_shape=(77,768),
    filters=128,
    kernel_size=1,
    kernel_function=PolynomialKernel(p=2, trainable_c=True),
))
classifier.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
#classifier.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
classifier.add(AveragePooling1D(3))
classifier.add(Capsule(num_capsule=1 ,dim_capsule=64, routings=1,share_weights=True))
classifier.add(Flatten())
classifier.add(Dense(units=2 , activation='softmax'))
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classifier.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
kernel_conv2d_20 (KernelConv (None, 77, 128)           98433     
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 75, 32)            12320     
_________________________________________________________________
average_pooling1d_18 (Averag (None, 25, 32)            0         
_________________________________________________________________
capsule_18 (Capsule)         (None, 1, 1, 64)          2048      
_________________________________________________________________
flatten_18 (Flatten)         (None, 64)                0         
_________________________________________________________________
dense_18 (Dense)             (None, 2)                 130       
Total params: 112,931
Trainable params: 112,931
Non-trainable params: 0
_______________________________________________

In [None]:
classifier.fit(np.array(X_train),np.array(y_train),batch_size=128,epochs=3,validation_data=(np.array(X_eval),np.array(y_eval)))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fc5e97c6b70>

In [None]:
file=open("heatmap_kervolution_bot.html","w")
for i in range(0,20):
  type_here=[]
  type_here.append(X_train1[i])
  typr_here=pd.DataFrame(type_here)
  tokenized = typr_here.iloc[:,0].apply((lambda x: tokenizer.encode(str(x), add_special_tokens=True)))
  padded = np.array([i + [0]*(77-len(i)) for i in tokenized.values])
  input_ids = torch.tensor(np.array(padded))
  attention_mask = np.where(padded != 0, 1, 0)
  input_ids = (torch.tensor(padded))
  attention_mask = (torch.tensor(attention_mask))
  with torch.no_grad():
    last_hidden_states_test = model(input_ids,attention_mask)
  y_pred = classifier.predict(last_hidden_states_test[0].numpy())
  Xtst=last_hidden_states_test[0]
  class_idx = np.argmax(y_pred[0]) #not needed in this case as only two classes
  class_output = classifier.output[:, class_idx]
  last_conv_layer = classifier.get_layer("kernel_conv2d_20")
  grads = K1.gradients(class_output, last_conv_layer.output)[0]
  pooled_grads = K1.mean(grads)
  iterate = K1.function([classifier.input], [pooled_grads, last_conv_layer.output[0]])
  pooled_grads_value, conv_layer_output_value = iterate([Xtst])
  heatmap = np.mean(conv_layer_output_value, axis=-1)
  heatmap = np.maximum(heatmap,0)
  heatmap /= np.max(heatmap)#normalise values in the prediction
  norm_len = 36/last_conv_layer.output_shape[1]
  html = ""
  if y_pred[0][0]>0.5:
    pred = '90078731'
  else:
    pred = '51964081'
  html += "<span><h3>Based on the description, the model believes that text belongs to {} author ".format(pred)
  html += "<small><br>Confidence: {:.0f}%<br><br></small></h3></span>".format(abs(((y_pred[0][0]*100)-50)*2))
  for j,i in enumerate(type_here[0].split()):
    html += "<span style='background-color:rgba({},0,15,{})'>{} </span>".format(heatmap[math.floor(j/norm_len)]*255,heatmap[math.floor(j/norm_len)]-0.3,i)
  file.write(html)
file.close()
HTML(html)   


In [None]:

X_test=pd.DataFrame(X_test)
tokenized = X_test.iloc[:,0].apply((lambda x: tokenizer.encode(str(x), add_special_tokens=True)))

In [None]:

max_len = 0
for i in tokenized.values:
    if len(i) > max_len:
        max_len = len(i)

padded = np.array([i + [0]*(51-len(i)) for i in tokenized.values])

In [None]:
#for adding paddings
input_ids = torch.tensor(np.array(padded))

In [None]:
#for adding paddings
input_ids = torch.tensor(np.array(padded))

In [None]:
#to set the paddings to zero and rest to 1
attention_mask = np.where(padded != 0, 1, 0)
attention_mask.shape

(250, 51)

In [None]:
input_ids = (torch.tensor(padded))
attention_mask = (torch.tensor(attention_mask))

In [None]:
with torch.no_grad():
    last_hidden_states_test = model(input_ids,attention_mask)

In [None]:
last_hidden_states_test[0].shape

torch.Size([250, 51, 768])

In [None]:
X_test=last_hidden_states_test[0].numpy()

In [None]:
y_pred =  classifier.predict(X_test)

In [None]:
y_pred

array([[0.01956765, 0.00745   , 0.03455538, ..., 0.00793293, 0.00837867,
        0.0050374 ],
       [0.00535491, 0.05970927, 0.01223011, ..., 0.02090672, 0.00927505,
        0.00465346],
       [0.00195544, 0.0022589 , 0.00247086, ..., 0.00280461, 0.00441738,
        0.00767668],
       ...,
       [0.26415133, 0.03437058, 0.01567418, ..., 0.00672165, 0.0059067 ,
        0.01181264],
       [0.01686945, 0.01717686, 0.02739022, ..., 0.01563231, 0.00881726,
        0.0091142 ],
       [0.01239913, 0.00452661, 0.01392127, ..., 0.00327289, 0.00521482,
        0.00444992]], dtype=float32)

In [None]:

#TO get maximum value as 1 and rest to zero
y_pred=pd.DataFrame(y_pred)
y_pred=y_pred.eq(y_pred.where(y_pred != 0).max(1), axis=0).astype(int)
y_pred=y_pred.iloc[:,:].values

In [None]:
y_test=pd.DataFrame(y_test)
y_test=y_test.eq(y_test.where(y_test != 0).max(1), axis=0).astype(int)
y_test=y_test.iloc[:,:].values

In [None]:

result=[]
for i in range(0,len(y_test)):
  for j in range(0,len(y_test[0])):
    if(y_test[i][j]==1):
      result.append(j)


In [None]:
predicted=[]
for i in range(0,len(y_pred)):
  for j in range(0,len(y_pred[0])):
    if(y_pred[i][j]==1):
      predicted.append(j)



In [None]:


print(result)
print(predicted)

[9, 30, 39, 18, 40, 8, 28, 4, 9, 43, 49, 13, 42, 6, 35, 48, 16, 44, 35, 21, 24, 23, 3, 20, 19, 12, 2, 22, 9, 37, 20, 17, 43, 48, 26, 14, 9, 3, 2, 9, 16, 49, 47, 13, 11, 48, 19, 45, 47, 17, 35, 27, 31, 14, 11, 19, 43, 22, 33, 13, 17, 37, 36, 29, 10, 10, 44, 18, 12, 4, 31, 41, 18, 41, 46, 38, 29, 37, 39, 40, 40, 0, 0, 7, 37, 44, 46, 17, 41, 34, 11, 32, 33, 38, 24, 4, 20, 34, 22, 38, 34, 33, 5, 41, 15, 3, 2, 15, 16, 11, 35, 48, 37, 22, 12, 25, 49, 43, 30, 7, 36, 8, 45, 6, 40, 42, 38, 3, 15, 29, 29, 5, 8, 47, 32, 3, 2, 18, 26, 31, 30, 13, 10, 31, 33, 26, 26, 17, 5, 16, 21, 27, 45, 31, 1, 23, 38, 13, 25, 0, 49, 36, 34, 20, 41, 21, 1, 32, 7, 19, 10, 16, 19, 8, 15, 4, 47, 4, 39, 1, 7, 42, 25, 24, 46, 36, 30, 28, 39, 23, 44, 25, 23, 21, 0, 1, 1, 28, 25, 21, 33, 14, 10, 36, 47, 8, 42, 30, 28, 35, 2, 11, 27, 32, 32, 24, 27, 44, 12, 7, 6, 43, 6, 12, 6, 45, 14, 42, 34, 29, 22, 48, 23, 26, 45, 46, 28, 18, 39, 5, 46, 20, 5, 40, 49, 15, 27, 0, 14, 24]
[8, 15, 39, 42, 40, 4, 28, 4, 38, 43, 49, 0, 40, 

In [None]:
y_test.shape

(250, 50)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(result,predicted)

cm

array([[2, 1, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 1, 2, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 5, 0],
       [0, 0, 0, ..., 0, 0, 5]])

In [None]:

from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
print('Confusion Matrix :')
print(cm) 
print('Accuracy Score :',accuracy_score(result, predicted)) 
print('Report : ')
print(classification_report(result, predicted)) 

Confusion Matrix :
[[2 1 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 2 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 5]]
Accuracy Score : 0.328
Report : 
              precision    recall  f1-score   support

           0       0.22      0.40      0.29         5
           1       0.00      0.00      0.00         5
           2       0.18      0.40      0.25         5
           3       0.20      0.20      0.20         5
           4       0.50      0.40      0.44         5
           5       0.00      0.00      0.00         5
           6       0.33      0.20      0.25         5
           7       0.56      1.00      0.71         5
           8       0.00      0.00      0.00         5
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00         5
          11       0.40      0.80      0.53         5
          12       0.00      0.00      0.00         5
          13       0.40      0.40      0.40         5
          14     

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
k=980
X_train1=[]
y_train1=[]
for i in range(0,2):
  for j in range(k,k+20):
    X_train1.append(dataset.iloc[j,0])
    y_train1.append(dataset.iloc[j,1])
  k+=1000

In [None]:
file=open("heatmap_kervolution_bot.html","w")
for i in range(0,1):
  type_here=[]
  type_here.append(X_train1[i])
  typr_here=pd.DataFrame(type_here)
  tokenized = typr_here.iloc[:,0].apply((lambda x: tokenizer.encode(str(x), add_special_tokens=True)))
  padded = np.array([i + [0]*(82-len(i)) for i in tokenized.values])
  input_ids = torch.tensor(np.array(padded))
  attention_mask = np.where(padded != 0, 1, 0)
  input_ids = (torch.tensor(padded))
  attention_mask = (torch.tensor(attention_mask))
  with torch.no_grad():
    last_hidden_states_test = model(input_ids,attention_mask)
  y_pred = classifier.predict(last_hidden_states_test[0].numpy())
  Xtst=last_hidden_states_test[0]
  class_idx = np.argmax(y_pred[0]) #not needed in this case as only two classes
  class_output = classifier.output[:, class_idx]
  last_conv_layer = classifier.get_layer("kernel_conv2d_8")
  grads = K1.gradients(class_output, last_conv_layer.output)[0]
  pooled_grads = K1.mean(grads)
  iterate = K1.function([classifier.input], [pooled_grads, last_conv_layer.output[0]])
  pooled_grads_value, conv_layer_output_value = iterate([Xtst])
  heatmap = np.mean(conv_layer_output_value, axis=-1)
  heatmap = np.maximum(heatmap,0)
  heatmap /= np.max(heatmap)#normalise values in the prediction
  norm_len = 36/last_conv_layer.output_shape[1]
  html = ""
  if y_pred[0][0]>0.5:
    pred = '90078731'
  else:
    pred = '51964081'
  html += "<span><h3>Based on the description, the model believes that text belongs to {} author ".format(pred)
  html += "<small><br>Confidence: {:.0f}%<br><br></small></h3></span>".format(abs(((y_pred[0][0]*100)-50)*2))
  for j,i in enumerate(type_here[0].split()):
    html += "<span style='background-color:rgba({},0,15,{})'>{} </span>".format(heatmap[math.floor(j/norm_len)]*255,heatmap[math.floor(j/norm_len)]-0.3,i)
  #file.write(html)
#file.close()
HTML(html)   



In [None]:
type_here=[]
type_here.append('Now playing: Hermes House Band - Country roads. Tune in: http://stream.laut.fm/eurodance.m3u\n')
typr_here=pd.DataFrame(type_here)

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from IPython.display import HTML
from sklearn.model_selection import train_test_split
from numpy import array
from numpy import argmax
from keras.utils import to_categorical
from keras.models import Model
from keras import backend as K1
from keras.models import Sequential
from keras import layers
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer


pd.options.display.max_rows
pd.set_option('display.max_colwidth', -1)



In [None]:
tokenized = typr_here.iloc[:,0].apply((lambda x: tokenizer.encode(str(x), add_special_tokens=True)))

In [None]:

max_len = 0
for i in tokenized.values:
    if len(i) > max_len:
        max_len = len(i)

padded = np.array([i + [0]*(82-len(i)) for i in tokenized.values])

In [None]:

input_ids = torch.tensor(np.array(padded))

In [None]:
attention_mask = np.where(padded != 0, 1, 0)
attention_mask.shape

(1, 82)

In [None]:
input_ids = (torch.tensor(padded))
attention_mask = (torch.tensor(attention_mask))

In [None]:
with torch.no_grad():
    last_hidden_states_test = model(input_ids,attention_mask)

In [None]:
y_pred = classifier.predict(last_hidden_states_test[0].numpy())

In [None]:
y_pred

array([[0.03259752, 0.96740246]], dtype=float32)

In [None]:
Xtst=last_hidden_states_test[0]

In [None]:
class_idx = np.argmax(y_pred[0]) #not needed in this case as only two classes
class_output = classifier.output[:, class_idx]
last_conv_layer = classifier.get_layer("kernel_conv2d_5")

In [None]:
grads = K1.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K1.mean(grads)
iterate = K1.function([classifier.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([Xtst])

In [None]:

heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap,0)
heatmap /= np.max(heatmap)#normalise values in the prediction

In [None]:
heatmap

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.04559674, 0.        , 0.        , 0.        , 0.08412105,
       0.        , 0.00601779, 0.        , 0.07689551, 0.07597765,
       0.        , 0.05312858, 0.6212722 , 0.7219944 , 0.        ,
       0.25001332, 0.0473242 , 0.4543909 , 0.        , 0.        ,
       0.7650527 , 0.        , 0.06201762, 0.        , 0.        ,
       0.40358615, 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.00117577, 0.03919426, 0.1703591 , 0.04175245,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.02942058, 0.11977235, 0.15400

In [None]:
norm_len = 36/last_conv_layer.output_shape[1] # fi

In [None]:
html = ""
if y_pred[0][0]>0.5:
  pred = '90078731'
else:
  pred = '15401533'
html += "<span><h3>Based on the description, the model believes that text belongs to {} author ".format(pred)
html += "<small><br>Confidence: {:.0f}%<br><br></small></h3></span>".format(abs(((y_pred[0][0]*100)-50)*2))
for j,i in enumerate(type_here[0].split()):
  html += "<span style='background-color:rgba({},0,150,{})'>{} </span>".format(heatmap[math.floor(j/norm_len)]*255,heatmap[math.floor(j/norm_len)]-0.3,i)

HTML(html)
