# Sentiment Analysis

Download the Foursquare annotated comments in Brazilian Portuguese: https://www.kaggle.com/thaisalmeida/tips-foursquare/version/1

Place the files in subfolder 'docs/'

In [None]:
#!wget files if using Google Colab
!wget -q https://raw.githubusercontent.com/douglas125/TextClassification/master/preProcessing.py
!wget -q https://raw.githubusercontent.com/douglas125/TextClassification/master/Embeddings.py
!wget -q https://raw.githubusercontent.com/douglas125/TextClassification/master/requirements.txt
!pip install -r requirements.txt


#move CSVs to docs/ folder
from google.colab import files
files.upload()

!mkdir docs
!mv *.csv docs/
!ls

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import preProcessing

from sklearn.model_selection import RandomizedSearchCV
pd.set_option('max_colwidth',150)

In [2]:
df = pd.read_csv('docs/tips_scenario1_train.csv')
df.head(16)

Unnamed: 0,texto,rotulo
0,"A comida é deliciosa, mas pedi limonada suiça e me disseram que hoje estavam todos muito ocupados e que ninguém conseguiria me atender....melhor i...",-1.0
1,"A partir desta sexta feira dia 11 começam a abrir para jantar mas corre pois é só até as 22 hrs e no domingo dia das mães, estarão aberto durante ...",0.0
2,Joint burguer e brewdog,0.0
3,Agora de segunda a sexta o Habanero vai abrir no almoço com pratos mexicanos e tradicionais!,0.0
4,"Experimente o drink ""Dona Diabla"". Muito bom!",1.0
5,Nova senha do Wifi: 1129508219,0.0
6,Wi-fi 1129508219,0.0
7,"Adoramos a pizza carbonara e a paulistana. Não surpreendeu tanto, mas vale a pena por resgatar o tradicionalismo. Dica @Gourmet_For",1.0
8,"O diferencial desse Burger King é que você mesmo serve o refrigerante, e a vontade!",1.0
9,Unico defeito estacionamento pago!,-1.0


In [3]:
preProcessing.clean_text('Este é um teste de 354 números! Mas que: "interessante".')

'este é um teste de 000 números ! mas que : interessante .'

In [4]:
preProcessing.splitWithPunctuation('mas que: "legal"')

['mas', 'que', ':', '"', 'legal', '"']

In [5]:
df.shape

(1714, 2)

# Baseline: Bag of words

In [14]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
import numpy as np
df = df.fillna(0)

In [15]:
texts = df['texto'].astype(str).tolist()
categs = df['rotulo'].tolist()
texts = [preProcessing.clean_text(t) for t in texts]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(texts, categs, test_size=0.1, random_state=42)

In [17]:
countVec = CountVectorizer(max_features=4700, lowercase=False, strip_accents='unicode')
vectTexts_train = countVec.fit_transform(X_train)
vectTexts_test = countVec.transform(X_test)

In [18]:
countVec.vocabulary_

{'achei': 72,
 'comida': 960,
 'bem': 498,
 'mediocre': 2670,
 'prato': 3380,
 'com': 929,
 'muitas': 2845,
 'coisas': 915,
 'mas': 2647,
 'nada': 2878,
 'sabor': 3782,
 'nao': 2885,
 'vale': 4367,
 'que': 3523,
 'custa': 1173,
 'picburguer': 3272,
 'americano': 220,
 'sempre': 3883,
 'muito': 2847,
 'caro': 720,
 'pelo': 3191,
 'tamanho': 4121,
 'fomos': 1889,
 'em': 1476,
 'dois': 1416,
 'gastamos': 2006,
 '00': 0,
 'para': 3105,
 'fast': 1781,
 'food': 1891,
 'de': 1190,
 'normal': 2941,
 'foi': 1879,
 'ojo': 2993,
 'del': 1231,
 'bifefantastico': 515,
 'torta': 4263,
 'bacalhau': 406,
 'maravilhosa': 2628,
 'saladona': 3811,
 'opcao': 3017,
 'por': 3340,
 'reais': 3588,
 'no': 2930,
 'almoco': 190,
 'servico': 3927,
 'gentil': 2030,
 'unico': 4352,
 'problema': 3446,
 'demora': 1253,
 'desnecessario': 1302,
 'colocar': 926,
 'todos': 4235,
 'os': 3039,
 'funcionarios': 1974,
 'trabalhar': 4276,
 'dia': 1336,
 'deveriam': 1329,
 'ter': 4176,
 'dado': 1176,
 'folga': 1881,
 'pois': 3

In [19]:
np.argmax(countVec.transform(['experimente', 'achei', 'caro']), axis=1)

matrix([[1723],
        [  72],
        [ 720]])

In [20]:
vectTexts_train

<1542x4551 sparse matrix of type '<class 'numpy.int64'>'
	with 25320 stored elements in Compressed Sparse Row format>

In [21]:
mnb = MultinomialNB()
mnb.fit(vectTexts_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [22]:
mnb.score(vectTexts_train, y_train)

0.9111543450064851

In [23]:
mnb.score(vectTexts_test, y_test)

0.7790697674418605

In [24]:
mnbParams = { #'verbose' : [1],
             'alpha':[0.001, 0.1,1,10, 100],  
             'fit_prior' :[True, False]}
mnbRSCV = RandomizedSearchCV(mnb, mnbParams, verbose=1, return_train_score=True) #, n_jobs=-1)
mnbRSCV.fit(vectTexts_train, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.1s finished


RandomizedSearchCV(cv=None, error_score='raise',
          estimator=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'alpha': [0.001, 0.1, 1, 10, 100], 'fit_prior': [True, False]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring=None, verbose=1)

In [25]:
pd.DataFrame(mnbRSCV.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_fit_prior,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.003998,0.003266,0.001334,0.001887,True,0.001,"{'fit_prior': True, 'alpha': 0.001}",0.753398,0.776699,0.757812,0.762646,0.010113,3,0.988315,0.986368,0.990291,0.988325,0.001602
1,0.002666,0.001885,0.001334,0.001887,False,0.001,"{'fit_prior': False, 'alpha': 0.001}",0.745631,0.761165,0.757812,0.754864,0.00668,4,0.988315,0.984421,0.98835,0.987029,0.001844
2,0.003999,2e-06,0.001332,0.001884,True,0.1,"{'fit_prior': True, 'alpha': 0.1}",0.745631,0.763107,0.703125,0.737354,0.025167,5,0.979552,0.980526,0.986408,0.982162,0.003028
3,0.003998,2e-06,0.0,0.0,False,0.1,"{'fit_prior': False, 'alpha': 0.1}",0.702913,0.714563,0.664062,0.693904,0.021572,7,0.9815,0.976631,0.980583,0.979571,0.002112
4,0.003999,2e-06,0.0,0.0,True,1.0,"{'fit_prior': True, 'alpha': 1}",0.8,0.8,0.791016,0.797017,0.004231,1,0.914314,0.906524,0.909709,0.910182,0.003198
5,0.005332,0.001886,0.0,0.0,False,1.0,"{'fit_prior': False, 'alpha': 1}",0.782524,0.8,0.771484,0.784695,0.011737,2,0.929893,0.927945,0.930097,0.929312,0.00097
6,0.005328,0.001886,0.0,0.0,True,10.0,"{'fit_prior': True, 'alpha': 10}",0.68932,0.687379,0.693359,0.690013,0.002489,8,0.704966,0.697176,0.692233,0.698125,0.005241
7,0.001333,0.001886,0.0,0.0,False,10.0,"{'fit_prior': False, 'alpha': 10}",0.702913,0.706796,0.714844,0.708171,0.004965,6,0.740019,0.72444,0.72233,0.72893,0.007889
8,0.00521,0.007368,0.0,0.0,True,100.0,"{'fit_prior': True, 'alpha': 100}",0.681553,0.681553,0.683594,0.682231,0.000961,10,0.682571,0.682571,0.681553,0.682232,0.00048
9,0.005208,0.007365,0.0,0.0,False,100.0,"{'fit_prior': False, 'alpha': 100}",0.685437,0.685437,0.689453,0.68677,0.001891,9,0.697176,0.688413,0.68932,0.691636,0.003935


In [26]:
mnbRSCV.best_estimator_.score(vectTexts_test, y_test)

0.7790697674418605

# Word Embedding Class

In [32]:
from Embeddings import WordEmbeddingBR, splitWithPunctuation
import numpy as np

In [33]:
WordEmbeddingBR.downloadNILCEmbeddings()
WordEmbeddingBR.getAvailableEmbeddings()

['cbow50_fasttext', 'cbow50_wang2vec', 'glove50']

In [34]:
wee = WordEmbeddingBR('cbow50_wang2vec')

Reading embedding file: cbow50_wang2vec.zip


934967it [01:08, 13550.89it/s]


In [26]:
classifiers = wee.TrainBaselineClassifiers(X_train, y_train, n_iter=4)

Fitting Support Vector Machine...
Fitting 3 folds for each of 8 candidates, totalling 24 fits
[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]

[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    7.9s finished


[LibSVM]Fitting Gradient Boosted Tree...
Fitting 3 folds for each of 4 candidates, totalling 12 fits
      Iter       Train Loss   Remaining Time 
         1         658.5195            6.26s
         2         495.4146            6.12s
         3         380.5770            6.32s
         4         297.1064            6.83s
         5         243.7370            6.74s
         6         212.2517            6.74s
         7         179.5781            6.47s
         8         152.4104            6.27s
         9         132.1538            6.12s
        10         115.4280            5.95s
        20          37.9122            4.85s
        30          13.3994            4.19s
        40           5.1836            3.59s
        50           1.8979            3.04s
        60           0.8089            2.58s
        70           0.3634            2.07s
        80           0.2790            1.48s
        90           0.2790            0.95s
       100           0.2790            0.52

[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  1.4min finished


      Iter       Train Loss   Remaining Time 
         1        1046.0737            7.97s
         2         848.6318            7.89s
         3         705.4500            7.73s
         4         598.5587            7.87s
         5         497.5283            7.63s
         6         436.0509            7.43s
         7         381.8273            7.55s
         8         335.0726            7.40s
         9         299.2200            7.25s
        10         270.3118            7.29s
        20         105.2294            6.11s
        30          52.3799            5.49s
        40          26.7224            4.86s
        50          15.5067            4.24s
        60          10.4264            3.51s
        70           7.7755            2.80s
        80           6.2265            2.13s
        90           5.3279            1.50s
       100           4.8589            0.89s


In [28]:
wee.TestBaselineClassifiers(X_train, y_train, classifiers)

{'SVM': 0.9980544747081712, 'GradientBoostingClassifier': 0.9980544747081712}

In [27]:
wee.TestBaselineClassifiers(X_test, y_test, classifiers)

{'SVM': 0.7732558139534884, 'GradientBoostingClassifier': 0.7558139534883721}

In [89]:
svmRSCV.best_estimator_.score(vectTexts_test, y_test)

0.8081395348837209

# RNN

Base: Bidirectional Attention Flow for Machine Comprehension https://arxiv.org/abs/1611.01603

TODO: Write data generator, model compatible with scikit-learn RandomSearchCV

Inputs to the model:

- Integer codes of each word
- Integer codes of each character of each word
- Pretrained embeddings for each word

## Step 1: Encode characters

For the character embedding layer, we will use all available characters plus the [PAD] character.

In [1]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import euclidean_distances

##############
#Dictionaries
##############

#for all dicts, add 1 to result and reserve 0 to not found
from sklearn.feature_extraction.text import CountVectorizer
import string

allchars = string.printable
allchars = [x for x in allchars] + ['PAD']
allchars = { allchars[i]:i for i in range(len(allchars)) if allchars[i] not in [' ','\n']}

def extractVocabulary(textSet, maxWords = 3000):
    #extracts vocabulary from a list of texts
    #preprocessing to remove accents and uppercase should be done before
    
    countVec = CountVectorizer(max_features=maxWords, lowercase=False, strip_accents=None)
    countVec.fit(textSet)
    
    #append punctuation
    vocab = countVec.vocabulary_
    n = len(vocab)
    for x in string.punctuation:
        vocab[x]=n
        n += 1
    
    return vocab

def sentence2code(sentence, vocabulary, embClass = None):
    """
    Converts a sentence to char embedding codes, word embedding codes and embeddings
    
    sentence - list of words in the sentence, usually from preProcessing.clean_text().split(' ')
    embClass - a class that implements method encodeWord and has property embDim (embedding dimension)
    """
    
    assert type(sentence) == list, 'sentence should be a list of words'
    
    #sentence
    sentCode = [vocabulary.get(w,-1)+1 for w in sentence]
    
    #characters
    sent_len = len(sentence)
    maxwlen = np.max([len(x) for x in sentence])
    charCodes = np.zeros( (sent_len, maxwlen) ) + allchars['PAD']
    
    for i in range(sent_len):
        charEnc = [allchars.get(cc, -1)+1 for cc in sentence[i]]
        charCodes[i, 0:len(charEnc)] = charEnc
    
    wordEmbeddings = None
    if embClass is not None:
        wordEmbeddings = np.zeros ((sent_len, embClass.embDim))
        for i in range(sent_len):
            wordEmbeddings[i] = embClass.encodeWord(sentence[i])
        
    return np.array(sentCode), charCodes.astype(int), wordEmbeddings


##############
#Keras models
##############
#change LSTM to CuDNNLSTM
from keras.layers import CuDNNLSTM as LSTM
from keras.layers import Input, Embedding, Conv2D, Lambda, Concatenate, Bidirectional, TimeDistributed
from keras.models import Model, load_model
from keras import backend as K

def createCharEncoder(charDictSize, embSize, nFiltersNGram=16, filterSize = 5):
    """
    Creates a character encoder. Receives the integer code of the character.
    
    charDictSize - Length of dictionary of characters
    embSize - Embedding size
    """
    inp = Input((None, ))
    
    embedded = Embedding(charDictSize, embSize)(inp)
        
    embedded = Lambda(lambda x: K.expand_dims(x))(embedded)
    ngram = Conv2D(nFiltersNGram, kernel_size = (5,1), padding='same', activation='relu')(embedded)
    ngram = Conv2D(1, kernel_size = (filterSize,1), padding='same', activation=None)(ngram)
    
    ngram = Lambda(lambda x: K.squeeze(x, axis=3))(ngram)
    ngram = Bidirectional(LSTM(embSize))(ngram)
    
    output = ngram
    
    model = Model(inputs=[inp], outputs=[output], name='CharEncoder')
    return model

def createDocEncoder(dictSize, embSize, nFiltersWordGram = 10, filterSize = 5, embDim = None):
    """
    Creates a document encoder. Receives the integer code of the words.
    
    dictSize - Length of word dictionary
    embSize - Embedding size
    """
    inp = Input((None, ))
    
    embedded = Embedding(dictSize, embSize)(inp)
    
    #combine learned and pretrained embeddings
    if embDim is not None:
        preTrainedEmb = Input((None, embDim))
        embedded = Concatenate()([embedded, preTrainedEmb])

    
    embedded = Lambda(lambda x: K.expand_dims(x))(embedded)
    ngram = Conv2D(nFiltersWordGram, kernel_size = (filterSize,1), padding='same', activation='relu')(embedded)
    ngram = Conv2D(1, kernel_size = (filterSize,1), padding='same', activation=None)(ngram)
    
    ngram = Lambda(lambda x: K.squeeze(x, axis=3))(ngram)
    ngram = Bidirectional(LSTM(embSize//2, return_sequences=True))(ngram)
    
    output = ngram
    
    if embDim is None:
        model = Model(inputs=[inp], outputs=[output], name='WordEncoder')
    else:
        model = Model(inputs=[inp, preTrainedEmb], outputs=[output], name='WordEncoderWithPreEmb')
    return model  


##############################
#Scikit learn compatible model
##############################



class BiDirAttModel(BaseEstimator, ClassifierMixin):

    def __init__(self, charEmbeddingDim = 'auto'):
        """
        Initializes classifier
        
        charEmbeddingDim - desired character embedding dimension or sqrt(len(allchars)) if 'auto'
        """
        self.charEmbeddingDim = charEmbeddingDim
        
    def fit(self, X, y):
        if self.charEmbeddingDim == 'auto':
            self.charEmbeddingDim_ = int(np.sqrt(len(allchars)))
        else:
            self.charEmbeddingDim_ = self.charEmbeddingDim
        

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        # Return the classifier
        return self

    def predict(self, X):

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [83]:
X_train[8]

'bons cortes grelhados , especialmente o galeto . buffet de saladas bom , com destaque para os legumes grelhados . atendimento rápido e pratos idem . preço mais caro , mas vale pela comida deliciosa .'

In [84]:
vocab = extractVocabulary(X_train, maxWords=10000,)

In [85]:
len(X_train[8].split(' '))

36

In [86]:
sentence2code(X_train[8].split(' '), vocab, embClass=wee)[2]

array([[ 0.476257, -0.258659, -0.091403, ..., -0.017869, -0.636823,
         0.397816],
       [ 0.53667 , -0.677689, -0.02436 , ...,  0.62127 ,  0.219901,
         0.541482],
       [-0.71601 , -0.611201,  0.515311, ...,  0.061538,  0.031443,
         0.326067],
       ...,
       [-0.643728, -0.183329,  0.17764 , ..., -0.214589,  0.809557,
        -0.426949],
       [-0.07863 , -0.374874, -0.634451, ..., -0.425417,  0.110938,
        -0.018094],
       [-0.185625, -0.287698, -0.240053, ..., -0.028405,  0.298558,
        -0.006506]])

# Keras prototyping

In [12]:
m=createDocEncoder(4000,200, embDim=50)
m.summary()
plot_model(m, to_file='wordEncoder.png')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, None, 200)    800000      input_10[0][0]                   
__________________________________________________________________________________________________
input_11 (InputLayer)           (None, None, 50)     0                                            
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, None, 250)    0           embedding_5[0][0]                
                                                                 input_11[0][0]                   
__________

In [13]:
m = createCharEncoder(100, 16)
m.summary()
plot_model(m, to_file='characterEncoder.png')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, None)              0         
_________________________________________________________________
embedding_6 (Embedding)      (None, None, 16)          1600      
_________________________________________________________________
lambda_11 (Lambda)           (None, None, 16, 1)       0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, None, 16, 16)      96        
_________________________________________________________________
conv2d_12 (Conv2D)           (None, None, 16, 1)       81        
_________________________________________________________________
lambda_12 (Lambda)           (None, None, 16)          0         
_________________________________________________________________
bidirectional_6 (Bidirection (None, 32)                4352      
Total para

In [4]:
def createBiDirAttModel(charDictSize, dictSize,
                        charEmbSize=16, nFiltersNGram=16, charfilterSize = 5, #character params
                        wordEmbSize=128, nFiltersWordGram = 10, wordfilterSize = 5, preTrainedEmbDim = None): #word params
    
    inputChars = Input((None, None))
    cFeatLayer = createCharEncoder(charDictSize, charEmbSize, nFiltersNGram, charfilterSize)
    charFeats = TimeDistributed(cFeatLayer)(inputChars)
    
    inputWords = Input((None, ))
    if preTrainedEmbDim is not None:
        preTrainedEmb = Input((None, preTrainedEmbDim))
        wordEncoded = createDocEncoder(dictSize, wordEmbSize, nFiltersWordGram, 
                                       wordfilterSize, preTrainedEmbDim)([inputWords, preTrainedEmb])
    else:
        wordEncoded = createDocEncoder(dictSize, wordEmbSize, nFiltersWordGram, 
                                       wordfilterSize, preTrainedEmbDim)(inp)
        
    output = Concatenate()([wordEncoded, charFeats])
    if preTrainedEmbDim is None:
        model = Model(inputs=[inputWords], outputs=[output], name='BiAttEnc')
    else:
        model = Model(inputs=[inputWords, inputChars, preTrainedEmb], outputs=[output], name='BiAttEncWithPretrainedEmb')
        
    return model

In [5]:
m = createBiDirAttModel(16, 3000, preTrainedEmbDim=50)
m.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_7 (InputLayer)            (None, None, 50)     0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, None, None)   0                                            
__________________________________________________________________________________________________
WordEncoderWithPreEmb (Model)   (None, None, 128)    509039      input_6[0][0]                    
                                                                 input_7[0][0]                    
__________

In [8]:
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'

In [10]:
os.environ["PATH"]

'C:\\Users\\dougl_000\\Anaconda3;C:\\Users\\dougl_000\\Anaconda3\\Library\\mingw-w64\\bin;C:\\Users\\dougl_000\\Anaconda3\\Library\\usr\\bin;C:\\Users\\dougl_000\\Anaconda3\\Library\\bin;C:\\Users\\dougl_000\\Anaconda3\\Scripts;C:\\Users\\dougl_000\\Anaconda3\\Library\\bin;C:\\ProgramData\\Oracle\\Java\\javapath;C:\\WINDOWS\\system32;C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;C:\\Program Files (x86)\\GtkSharp\\2.12\\bin;C:\\Program Files\\SafeNet\\Authentication\\SAC\\x32;C:\\Program Files\\SafeNet\\Authentication\\SAC\\x64;C:\\Program Files (x86)\\Windows Live\\Shared;C:\\Program Files (x86)\\Skype\\Phone\\;C:\\Program Files\\dotnet\\;C:\\Program Files\\Microsoft SQL Server\\130\\Tools\\Binn\\;C:\\WINDOWS\\System32\\OpenSSH\\;C:\\Program Files\\doxygen\\bin;C:\\Program Files\\Git\\cmd;C:\\Users\\dougl_000\\AppData\\Local\\Microsoft\\WindowsApps;;C:\\Users\\dougl_000\\Anaconda3\\lib\\site-packages\\numpy\\.libs;C:/Program Files (x86)/Graphv

In [11]:
from keras.utils import plot_model

plot_model(m, to_file='BidirAtt.png')

In [7]:
!pip install graphviz



In [125]:
m.predict(np.array([[0,2,5,6,1],[0,2,5,1,99]])).shape

(2, 5, 16, 16)

In [80]:
from sklearn.utils.estimator_checks import check_estimator
from sklearn.svm import LinearSVC
check_estimator(BiDirAttModel) 

10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
