In [1]:
# Train model and make predictions
import numpy as np
import pandas as pd
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


In [2]:
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# load dataset
iris = datasets.load_iris()
X, Y, labels = iris.data, iris.target, iris.target_names
X = preprocessing.scale(X)

In [4]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [5]:
# convert integers to dummy variables (i.e. one hot encoded)
y = np_utils.to_categorical(encoded_Y)

In [6]:
encoded_Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.25, random_state=42)

In [45]:
def build_model():
    model = Sequential()
    model.add(Dense(40, input_dim=4, kernel_initializer='normal', activation='relu'))
    model.add(Dense(3, kernel_initializer='normal', activation='sigmoid'))
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

In [61]:
estimator = KerasClassifier(build_fn=build_model, epochs=50, batch_size=5, verbose=2)

In [62]:
estimator.fit(X_train, y_train)

Epoch 1/50
0s - loss: 1.0897 - acc: 0.5804
Epoch 2/50
0s - loss: 1.0559 - acc: 0.7143
Epoch 3/50
0s - loss: 1.0029 - acc: 0.7143
Epoch 4/50
0s - loss: 0.9237 - acc: 0.7143
Epoch 5/50
0s - loss: 0.8256 - acc: 0.6964
Epoch 6/50
0s - loss: 0.7218 - acc: 0.6964
Epoch 7/50
0s - loss: 0.6371 - acc: 0.6875
Epoch 8/50
0s - loss: 0.5764 - acc: 0.6964
Epoch 9/50
0s - loss: 0.5349 - acc: 0.6964
Epoch 10/50
0s - loss: 0.5036 - acc: 0.7411
Epoch 11/50
0s - loss: 0.4795 - acc: 0.7768
Epoch 12/50
0s - loss: 0.4577 - acc: 0.7857
Epoch 13/50
0s - loss: 0.4395 - acc: 0.8036
Epoch 14/50
0s - loss: 0.4214 - acc: 0.8125
Epoch 15/50
0s - loss: 0.4047 - acc: 0.8304
Epoch 16/50
0s - loss: 0.3889 - acc: 0.8393
Epoch 17/50
0s - loss: 0.3741 - acc: 0.8304
Epoch 18/50
0s - loss: 0.3608 - acc: 0.8304
Epoch 19/50
0s - loss: 0.3444 - acc: 0.8393
Epoch 20/50
0s - loss: 0.3314 - acc: 0.8482
Epoch 21/50
0s - loss: 0.3189 - acc: 0.8661
Epoch 22/50
0s - loss: 0.3076 - acc: 0.8750
Epoch 23/50
0s - loss: 0.2956 - acc: 0.89

<keras.callbacks.History at 0x2210fa2e978>

In [63]:
y_predicted = estimator.predict(X_test)

In [64]:
accuracy_score(y_test, y_predicted)

1.0

In [65]:
print(classification_report(y_test, y_predicted))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        15
          1       1.00      1.00      1.00        11
          2       1.00      1.00      1.00        12

avg / total       1.00      1.00      1.00        38



# NLTK test

In [14]:
import nltk
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer

In [15]:
lemmatizer = WordNetLemmatizer()

In [16]:
sentence = u'Good afternoon, ladies and gentlemen and welcome to the Acxiom Fiscal 2017 Third Quarter Earnings Call. At this time, all participants are in a listen-only mode. Later, we will conduct a question-and-answer session, and instructions will be given at that time. [Operator Instructions]. As a reminder, this conference call is being recorded. I would now like to turn the call over to your host, Mrs. Lauren Dillard, Senior Director of Investor Relations.'

In [17]:
v_words = [lemmatizer.lemmatize(t.lower(), 'v') for t in word_tokenize(sentence)]

In [18]:
words = [lemmatizer.lemmatize(t.lower()) for t in word_tokenize(sentence)]

In [66]:
words_sum = np.array([v_words, words])

In [68]:
pd.DataFrame(words_sum.transpose(), columns=['V_Words', 'Words'])

Unnamed: 0,V_Words,Words
0,good,good
1,afternoon,afternoon
2,",",","
3,ladies,lady
4,and,and
5,gentlemen,gentleman
6,and,and
7,welcome,welcome
8,to,to
9,the,the
