### Packages

In [1]:
import sys
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.models import load_model
from keras_metrics import KerasMetrics
from keras.callbacks import EarlyStopping
from keras import optimizers
import numpy as np
import random
import pickle
import decoding_ngrams as dc
from Files import nodes
import os
import utility as ut
import livelossplot
sys.path.append('../')

Using TensorFlow backend.


### 1. Load Data

In [2]:
path = 'Corpus/train_corpus.csv'
X,Y = ut.load_ngrams(path)
batch = len(X)
batch

325

#### 1.2 Verify length of unique verbs

In [9]:
path = 'Corpus/train_corpus.csv'
X,Y = ut.load_ngrams(path, verbose = True)

AttributeError: module 'utility' has no attribute 'load_ngrams'

### 2. Fit Neural Net (Optional)

In [65]:
# 1. Define Model
model = Sequential()
model.add(Dense(1060, input_shape=(1060,), activation='sigmoid'))


# 2. Compile model
keras_metrics = KerasMetrics()
model.compile( 
    optimizer='adam', 
    loss='mean_squared_error',
    metrics = [ keras_metrics.fbeta_score,
               keras_metrics.recall,
               keras_metrics.precision]
            )

# 3. Fit model
stopper = EarlyStopping(monitor='fbeta_score', min_delta=0.00005, patience=50, verbose=1, mode='max')
model.fit(X,Y,epochs = 400, batch_size=batch,verbose=False,callbacks= [stopper])

# 4. Evaluate model
scores = model.evaluate(X, Y)
print("\n%s: %.2f%% \n%s: %.2f%% \n%s: %.2f%% \n%s: %.2f%%" % (model.metrics_names[0], scores[0]*100,
                                                               model.metrics_names[1], scores[1]*100, 
                                                               model.metrics_names[2], scores[2]*100,
                                                               model.metrics_names[3], scores[3]*100 ))

# 5. Save model
model.save('Models/NgramModels/second_test')

loss: 2.71% 
fbeta_score: 92.49% 
recall: 86.15% 
precision: 100.00%


2. Load Trained Model

In [24]:
keras_metrics = KerasMetrics()
model = load_model('Models/NgramModels/second_test',custom_objects={'fbeta_score': keras_metrics.fbeta_score,
                                                     'recall': keras_metrics.recall,
                                                     'precision': keras_metrics.precision})

### 3. Check Results
Test the prediction for some verbs.

The test set can be found here [here](https://github.com/beatrizalbiero/MsResearch/blob/master/WickelfeaturesProject/Corpus/test_corpus.csv).

## Results:

In [3]:
from ngrams_nodes import activation

In [8]:
with open('Corpus/test_corpus.csv', 'r') as f:
    testRaw = f.read().split('\n')
test = list()
for item in testRaw:
    test.append(item.split(','))
test.pop(-1)

['']

In [17]:
def pipeline(verbs, model):
    from ngrams_nodes import activation
    from tqdm import tqdm_notebook as tqdm
    import numpy as np
    import pandas as pd
    from decoding_ngrams import decoding
    
    # filtering the input values
    _input = list()
    for row in verbs:
        _input.append(row[1])
        
    #filtering the expected values
    _output = list()
    for row in verbs:
        _output.append(row[3])
        
    #coding the input values    
    test_list = list()
    for i in _input:
        coding = activation(i)
        test_list.append(coding)
    test_list = np.array(test_list)
    prediction = model.predict(test_list)
    #Create pandas 
    df = pd.DataFrame()
    df["prediction"] = []
    df["expected"] = []
    
    #comparing the expected values with predictions    
    accuracy = 0.0
    result = list()
    for i,item in enumerate(tqdm(prediction)):
        x = decoding(item)
        result.append(x)
        if x == _output[i]:
            accuracy += 1
        
    df["prediction"] = result
    df["expected"] = _output
    
    print("accuracy: " + str(accuracy/len(_output)))
    
    return df
    

## Mean Squared Errors

In [25]:
resultados_mean_squared_errors = pipeline(test,model)


accuracy: 0.15384615384615385


In [26]:
resultados_mean_squared_errors

Unnamed: 0,prediction,expected
0,#insendeiu#,#odeiu#
1,#hemedeiu#,#medeiu#
2,#ensensensen,#bOiu#
3,#andeviar#,#bOtu#
4,#tOmu#,#tOku#
5,#sOku#,#fOku#
6,#Oru#,#xOru#
7,#lansuruu#,#nOtu#
8,#sortu#,#surtu#
9,#digu#,#digu#


In [27]:
resultados_mean_squared_errors.to_csv("Files/Results/mean_squared_errors_results.csv",sep=';')

## Binary Cross-Entropy

In [4]:
# 1. Define Model
model_binary = Sequential()
model_binary.add(Dense(1060, input_shape=(1060,), activation='sigmoid'))


# 2. Compile model
keras_metrics = KerasMetrics()
model_binary.compile( 
    optimizer='adam', 
    loss='binary_crossentropy',
    metrics = [ keras_metrics.fbeta_score,
               keras_metrics.recall,
               keras_metrics.precision]
            )

# 3. Fit model
stopper = EarlyStopping(monitor='fbeta_score', min_delta=0.00005, patience=50, verbose=1, mode='max')
model_binary.fit(X,Y,epochs = 400, batch_size=batch,verbose=False,callbacks= [stopper])

# 4. Evaluate model
scores = model_binary.evaluate(X, Y)
print("\n%s: %.2f%% \n%s: %.2f%% \n%s: %.2f%% \n%s: %.2f%%" % (model_binary.metrics_names[0], scores[0]*100,
                                                               model_binary.metrics_names[1], scores[1]*100, 
                                                               model_binary.metrics_names[2], scores[2]*100,
                                                               model_binary.metrics_names[3], scores[3]*100 ))

# 5. Save model
model_binary.save('Models/NgramModels/binary_cross_entropy_test1')

loss: 14.08% 
fbeta_score: 88.50% 
recall: 79.62% 
precision: 100.00%


In [18]:
resultados_binary = pipeline(test,model_binary)


accuracy: 0.15384615384615385


In [20]:
resultados_binary

Unnamed: 0,prediction,expected
0,#insendeiu#,#odeiu#
1,#hemedeiu#,#medeiu#
2,#duka#,#bOiu#
3,#puluu#,#bOtu#
4,#tOmurxugulg,#tOku#
5,#le#,#fOku#
6,#xovu#,#xOru#
7,#notealava#,#nOtu#
8,#sortu#,#surtu#
9,#digu#,#digu#


In [23]:
resultados_binary.to_csv("Files/Results/binary_cross_entropy_results.csv",sep=';')