In [25]:
from nltk.corpus import gutenberg, stopwords
import nltk
from nltk.corpus import stopwords
# Keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation, SpatialDropout1D
from keras.layers.embeddings import Embedding
from keras.utils import np_utils

# Others
import string
import numpy as np
import pandas as pd

from sklearn.manifold import TSNE
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import re
import os

# model imports
from gensim.models.ldamulticore import LdaMulticore
from gensim.models.word2vec import Word2Vec
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
# hyperparameter training imports
from sklearn.model_selection import GridSearchCV

# set seed for reproducibility
np.random.seed(0)



In [7]:
all_files = os.listdir("book/")

In [8]:
arr = []
one_book = []
content = ''
path = 'book/'
for b in all_files[0:80]:
    one_book = []
    path = 'book/'
    one_book.append(b.split('___')[0])
    one_book.append(b)
    
    path += b
    with open(path, 'r') as myfile:
        content = myfile.read().replace('\n', ' ')
    one_book.append(content)
    arr.append(one_book)

In [9]:
columns = ['author', 'file_name', 'text']
df = pd.DataFrame(arr, columns=columns)

In [10]:
df.shape

(80, 3)

In [11]:
df.head()

Unnamed: 0,author,file_name,text
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,My dear father:-- Your letter of the 7th was ...
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,Fellow citizens of the United States: in comp...
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,"Four score and seven years ago, our fathers br..."
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",INTRODUCTION The facts of Lincoln's early lif...
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,Fellow countrymen: At this second appearing t...


In [12]:
def get_data_of_file(row):
    file_name = row.file_name
    text = row.text
    
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ! ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ^ ", text)
    text = re.sub(r"\+", " + ", text)
    text = re.sub(r"\-", " - ", text)
    text = re.sub(r"\=", " = ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r"(\d+)(k)", r"\g<1>000", text)
    text = re.sub(r":", " : ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    text = re.sub(r"\0s", "0", text)
    text = re.sub(r" 9 11 ", "911", text)
    text = re.sub(r"e - mail", "email", text)
    text = re.sub(r"j k", "jk", text)
    text = re.sub(r"\s{2,}", " ", text)
    
    row['text'] = text
    return row
df = df.apply(get_data_of_file, axis=1)

In [13]:
df.head()

Unnamed: 0,author,file_name,text
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,My dear father : - - Your letter of the 7th wa...
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,Fellow citizens of the United States : in comp...
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,Four score and seven years ago our fathers bro...
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",INTRODUCTION The facts of Lincoln early life a...
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,Fellow countrymen : At this second appearing t...


In [14]:
df.shape

(80, 3)

### Word2Vec

In [15]:
def get_good_tokens(sentence):
    replaced_punctation = list(map(lambda token: re.sub('[^0-9A-Za-z!?]+', '', token), sentence))
    removed_punctation = list(filter(lambda token: token, replaced_punctation))
    return removed_punctation

In [16]:
def w2v_preprocessing(df):
    """ All the preprocessing steps for word2vec are done in this function.
    All mutations are done on the dataframe itself. So this function returns
    nothing.
    """
    df['text'] = df.text.str.lower()
    df['document_sentences'] = df.text.str.split('.')  # split texts into individual sentences
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(map(nltk.word_tokenize, sentences)),
                                         df.document_sentences))  # tokenize sentences
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(map(get_good_tokens, sentences)),
                                         df.tokenized_sentences))  # remove unwanted characters
    df['tokenized_sentences'] = list(map(lambda sentences:
                                         list(filter(lambda lst: lst, sentences)),
                                         df.tokenized_sentences))  # remove empty lists

w2v_preprocessing(df)

In [20]:
df.head()

Unnamed: 0,author,file_name,text,document_sentences,tokenized_sentences
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,my dear father : - - your letter of the 7th wa...,[my dear father : - - your letter of the 7th w...,"[[my, dear, father, your, letter, of, the, 7th..."
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,fellow citizens of the united states : in comp...,[fellow citizens of the united states : in com...,"[[fellow, citizens, of, the, united, states, i..."
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,four score and seven years ago our fathers bro...,[four score and seven years ago our fathers br...,"[[four, score, and, seven, years, ago, our, fa..."
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",introduction the facts of lincoln early life a...,[introduction the facts of lincoln early life ...,"[[introduction, the, facts, of, lincoln, early..."
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,fellow countrymen : at this second appearing t...,[fellow countrymen : at this second appearing ...,"[[fellow, countrymen, at, this, second, appear..."


In [23]:
sentences = []
for sentence_group in df.tokenized_sentences:
    sentences.extend(sentence_group)

print("Number of sentences: {}.".format(len(sentences)))
print("Number of texts: {}.".format(len(df)))

Number of sentences: 80.
Number of texts: 80.


In [26]:
# Set values for various parameters
num_features = 200    # Word vector dimensionality
min_word_count = 3    # Minimum word count
num_workers = 4       # Number of threads to run in parallel
context = 6           # Context window size
downsampling = 1e-3   # Downsample setting for frequent words

# Initialize and train the model
W2Vmodel = Word2Vec(sentences=sentences,
                    sg=1,
                    hs=0,
                    workers=num_workers,
                    size=num_features,
                    min_count=min_word_count,
                    window=context,
                    sample=downsampling,
                    negative=5,
                    iter=6)

In [28]:
def get_w2v_features(w2v_model, sentence_group):
    """ Transform a sentence_group (containing multiple lists
    of words) into a feature vector. It averages out all the
    word vectors of the sentence_group.
    """
    words = np.concatenate(sentence_group)  # words in text
    index2word_set = set(w2v_model.wv.vocab.keys())  # words known to model
    
    featureVec = np.zeros(w2v_model.vector_size, dtype="float32")
    
    # Initialize a counter for number of words in a review
    nwords = 0
    # Loop over each word in the comment and, if it is in the model's vocabulary, add its feature vector to the total
    for word in words:
        if word in index2word_set: 
            featureVec = np.add(featureVec, w2v_model[word])
            nwords += 1.

    # Divide the result by the number of words to get the average
    if nwords > 0:
        featureVec = np.divide(featureVec, nwords)
    return featureVec

df['w2v_features'] = list(map(lambda sen_group:
                                      get_w2v_features(W2Vmodel, sen_group),
                                      df.tokenized_sentences))

  app.launch_new_instance()


In [29]:
df.head()

Unnamed: 0,author,file_name,text,document_sentences,tokenized_sentences,w2v_features
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,my dear father : - - your letter of the 7th wa...,[my dear father : - - your letter of the 7th w...,"[[my, dear, father, your, letter, of, the, 7th...","[0.057827342, 0.115951255, -0.07730536, 0.1662..."
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,fellow citizens of the united states : in comp...,[fellow citizens of the united states : in com...,"[[fellow, citizens, of, the, united, states, i...","[0.031063976, 0.11480296, -0.052323524, 0.1285..."
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,four score and seven years ago our fathers bro...,[four score and seven years ago our fathers br...,"[[four, score, and, seven, years, ago, our, fa...","[0.055926375, 0.0922096, -0.05004997, 0.146663..."
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",introduction the facts of lincoln early life a...,[introduction the facts of lincoln early life ...,"[[introduction, the, facts, of, lincoln, early...","[0.048888393, 0.12212666, -0.059196822, 0.1352..."
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,fellow countrymen : at this second appearing t...,[fellow countrymen : at this second appearing ...,"[[fellow, countrymen, at, this, second, appear...","[0.04082894, 0.12842312, -0.044956774, 0.12697..."


In [30]:
le = LabelEncoder()
df['author_LabelEncoded'] = le.fit_transform(df.author)

In [31]:
df.head(15)

Unnamed: 0,author,file_name,text,document_sentences,tokenized_sentences,w2v_features,author_LabelEncoded
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,my dear father : - - your letter of the 7th wa...,[my dear father : - - your letter of the 7th w...,"[[my, dear, father, your, letter, of, the, 7th...","[0.057827342, 0.115951255, -0.07730536, 0.1662...",0
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,fellow citizens of the united states : in comp...,[fellow citizens of the united states : in com...,"[[fellow, citizens, of, the, united, states, i...","[0.031063976, 0.11480296, -0.052323524, 0.1285...",0
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,four score and seven years ago our fathers bro...,[four score and seven years ago our fathers br...,"[[four, score, and, seven, years, ago, our, fa...","[0.055926375, 0.0922096, -0.05004997, 0.146663...",0
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",introduction the facts of lincoln early life a...,[introduction the facts of lincoln early life ...,"[[introduction, the, facts, of, lincoln, early...","[0.048888393, 0.12212666, -0.059196822, 0.1352...",0
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,fellow countrymen : at this second appearing t...,[fellow countrymen : at this second appearing ...,"[[fellow, countrymen, at, this, second, appear...","[0.04082894, 0.12842312, -0.044956774, 0.12697...",0
5,Abraham Lincoln,Abraham Lincoln___Speeches and Letters of Abra...,no man since washington has become to america...,[ no man since washington has become to americ...,"[[no, man, since, washington, has, become, to,...","[0.045868024, 0.1207288, -0.05844379, 0.141869...",0
6,Abraham Lincoln,Abraham Lincoln___State of the Union Addresses...,fellow - citizens of the senate and house of r...,[fellow - citizens of the senate and house of ...,"[[fellow, citizens, of, the, senate, and, hous...","[0.04522125, 0.1257728, -0.06482354, 0.1125935...",0
7,Abraham Lincoln,Abraham Lincoln___The Emancipation Proclamatio...,by the president of the united states of ameri...,[by the president of the united states of amer...,"[[by, the, president, of, the, united, states,...","[0.062996976, 0.123609036, -0.05132087, 0.1188...",0
8,Abraham Lincoln,Abraham Lincoln___The Life and Public Service ...,the discovery of an unknown address by abraha...,[ the discovery of an unknown address by abrah...,"[[the, discovery, of, an, unknown, address, by...","[0.07539342, 0.13717231, -0.057141304, 0.13539...",0
9,Abraham Lincoln,Abraham Lincoln___The Writings of Abraham Linc...,immediately after lincoln re - election to the...,[immediately after lincoln re - election to th...,"[[immediately, after, lincoln, re, election, t...","[0.051190715, 0.12859058, -0.05722702, 0.13637...",0


In [32]:
X_train, X_test, y_train, y_test = train_test_split(df.w2v_features, df.author_LabelEncoded, random_state = 0)

In [34]:
X_train_w2v = np.array(list(map(np.array, X_train)))
X_test_w2v = np.array(list(map(np.array, X_test)))

In [37]:
clf = SVC(kernel='linear').fit(X_train_w2v, y_train)

In [38]:
y_score = clf.predict(X_test_w2v)

In [39]:
n_right = 0
for i in range(len(y_score)):
    if y_score.tolist()[i] == y_test.tolist()[i]:
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 50.00%


In [40]:
print(classification_report(y_test, y_score))

             precision    recall  f1-score   support

          0       0.50      1.00      0.67         3
          3       0.00      0.00      0.00         1
          4       0.00      0.00      0.00         2
          5       0.00      0.00      0.00         2
          6       0.50      0.80      0.62         5
          7       0.50      1.00      0.67         3
          8       0.00      0.00      0.00         4

avg / total       0.28      0.50      0.35        20



  'precision', 'predicted', average, warn_for)


In [41]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=42)

In [42]:
rfc.fit(X_train_w2v, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

In [43]:
y_score = clf.predict(X_test_w2v)

In [44]:
print(classification_report(y_test, y_score))

             precision    recall  f1-score   support

          0       0.50      1.00      0.67         3
          3       0.00      0.00      0.00         1
          4       0.00      0.00      0.00         2
          5       0.00      0.00      0.00         2
          6       0.50      0.80      0.62         5
          7       0.50      1.00      0.67         3
          8       0.00      0.00      0.00         4

avg / total       0.28      0.50      0.35        20



  'precision', 'predicted', average, warn_for)


In [12]:
df.head(15)

Unnamed: 0,author,file_name,text,author_LabelEncoded
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,LINCOLN LETTERS By Abraham Lincoln Published ...,0
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,Lincoln First Inaugural Address March 4 1861 ...,0
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,Lincoln Gettysburg Address given November 19 ...,0
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",Longman English Classics LINCOLN S INAUGURALS...,0
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,Lincoln Second Inaugural Address March 4 1865...,0
5,Abraham Lincoln,Abraham Lincoln___Speeches and Letters of Abra...,SPEECHES LETTERS OF ABRAHAM LINCOLN 1832 - 18...,0
6,Abraham Lincoln,Abraham Lincoln___State of the Union Addresses...,State of the Union Address Abraham Lincoln De...,0
7,Abraham Lincoln,Abraham Lincoln___The Emancipation Proclamatio...,THE EMANCIPATION PROCLAMATION : By the Presid...,0
8,Abraham Lincoln,Abraham Lincoln___The Life and Public Service ...,NOTE After lying buried for almost three quar...,0
9,Abraham Lincoln,Abraham Lincoln___The Writings of Abraham Linc...,THE PAPERS AND WRITINGS OF ABRAHAM LINCOLN VO...,0


### TF-IDF

In [45]:
vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf =True, lowercase=True, strip_accents='ascii', stop_words = 'english')

In [46]:
XText_tfidf = vectorizer.fit_transform(df['text'])

In [47]:
XText_tfidf.shape

(80, 70434)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(XText_tfidf, df.author_LabelEncoded, random_state = 0)

In [49]:
clf = MultinomialNB().fit(X_train, y_train)

In [50]:
y_score = clf.predict(X_test)

In [51]:
n_right = 0
for i in range(len(y_score)):
    if y_score.tolist()[i] == y_test.tolist()[i]:
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 50.00%


In [52]:
print(classification_report(y_test, y_score))

             precision    recall  f1-score   support

          0       0.60      1.00      0.75         3
          3       0.00      0.00      0.00         1
          4       0.00      0.00      0.00         2
          5       0.00      0.00      0.00         2
          6       0.40      0.80      0.53         5
          7       0.60      1.00      0.75         3
          8       0.00      0.00      0.00         4

avg / total       0.28      0.50      0.36        20



  'precision', 'predicted', average, warn_for)


In [53]:
clf = SVC(kernel='linear').fit(X_train, y_train)

In [54]:
y_score = clf.predict(X_test)

In [55]:
n_right = 0
for i in range(len(y_score)):
    if y_score.tolist()[i] == y_test.tolist()[i]:
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 95.00%


In [56]:
print(classification_report(y_test, y_score))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          3       0.00      0.00      0.00         1
          4       1.00      1.00      1.00         2
          5       1.00      1.00      1.00         2
          6       1.00      1.00      1.00         5
          7       0.75      1.00      0.86         3
          8       1.00      1.00      1.00         4

avg / total       0.91      0.95      0.93        20



  'precision', 'predicted', average, warn_for)


In [25]:
df.head()

Unnamed: 0,author,file_name,text,author_LabelEncoded
0,Abraham Lincoln,Abraham Lincoln___Lincoln Letters.txt,LINCOLN LETTERS By Abraham Lincoln Published ...,0
1,Abraham Lincoln,Abraham Lincoln___Lincoln's First Inaugural Ad...,Lincoln First Inaugural Address March 4 1861 ...,0
2,Abraham Lincoln,Abraham Lincoln___Lincoln's Gettysburg Address...,Lincoln Gettysburg Address given November 19 ...,0
3,Abraham Lincoln,"Abraham Lincoln___Lincoln's Inaugurals, Addres...",Longman English Classics LINCOLN S INAUGURALS...,0
4,Abraham Lincoln,Abraham Lincoln___Lincoln's Second Inaugural A...,Lincoln Second Inaugural Address March 4 1865...,0


### Bag Of Word

In [57]:
from sklearn.feature_extraction.text import CountVectorizer

In [58]:
count_vectorizer = CountVectorizer(stop_words='english')
XText_CountVectorizer = count_vectorizer.fit_transform(df['text'])

In [59]:
XText_CountVectorizer.shape[1]

70434

In [60]:
X_train, X_test, y_train, y_test = train_test_split(XText_CountVectorizer, df.author_LabelEncoded, random_state = 0)

In [61]:
clf = MultinomialNB().fit(X_train, y_train)

In [62]:
y_score = clf.predict(X_test)

In [63]:
n_right = 0
for i in range(len(y_score)):
    if y_score.tolist()[i] == y_test.tolist()[i]:
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 95.00%


In [64]:
print(classification_report(y_test, y_score))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          3       0.00      0.00      0.00         1
          4       1.00      1.00      1.00         2
          5       1.00      1.00      1.00         2
          6       0.83      1.00      0.91         5
          7       1.00      1.00      1.00         3
          8       1.00      1.00      1.00         4

avg / total       0.91      0.95      0.93        20



  'precision', 'predicted', average, warn_for)


In [65]:
clf = SVC(kernel='linear').fit(X_train, y_train)

In [66]:
y_score = clf.predict(X_test)

In [67]:
y_score.shape

(20,)

In [68]:
n_right = 0
for i in range(len(y_score)):
    if y_score.tolist()[i] == y_test.tolist()[i]:
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 85.00%


In [69]:
print(classification_report(y_test.tolist(), y_score.tolist()))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         3
          1       0.00      0.00      0.00         0
          3       0.00      0.00      0.00         1
          4       1.00      1.00      1.00         2
          5       1.00      1.00      1.00         2
          6       0.71      1.00      0.83         5
          7       1.00      1.00      1.00         3
          8       1.00      0.50      0.67         4

avg / total       0.88      0.85      0.84        20



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


### Word Embedding

In [70]:
### Create sequence
vocabulary_size = XText_CountVectorizer.shape[1]
num_of_author = len(df['author_LabelEncoded'].unique())

tokenizer = Tokenizer(num_words= vocabulary_size)
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])
data = pad_sequences(sequences, maxlen=5000)

In [71]:
X_train, X_test, y_train, y_test = train_test_split(data, np_utils.to_categorical(df['author_LabelEncoded']), random_state = 0)

In [72]:
model = Sequential()
model.add(Embedding(vocabulary_size, 256, input_length = data.shape[1]))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(num_of_author, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=5)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x38f21e10>

In [73]:
y_score = model.predict(X_test)
y_score = [[1 if i == max(sc) else 0 for i in sc] for sc in y_score]
n_right = 0
for i in range(len(y_score)):
    if all(y_score[i][j] == np.array(y_test)[i][j] for j in range(len(y_score[i]))):
        n_right += 1

print("Accuracy: %.2f%%" % ((n_right/float(len(y_test)) * 100)))

Accuracy: 25.00%
