In [1]:
import pickle
import os
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
import gensim
import numpy as np
import pandas as pd
from random import shuffle
from sklearn import svm, model_selection 
import data_analysis.preprocessor_end as pre_processer



In [2]:
def out_pickle(pickle_path, pick_name, variable_name):
    with open(pickle_path + pick_name + ".pkl", "wb") as pkl:
        pickle.dump(variable_name, pkl)

def in_pickle(pickle_path, pick_name):
    with open(pickle_path + pick_name + ".pkl", "rb") as pkl:
        return pickle.load(pkl)

# converts the glove file to word2vec file, then load the word2vec model and returns it 
def glove_file_to_word2vec_model(glove_file_path):
    glove_file = datapath(glove_file_path)
    tmp_file = get_tmpfile("word2vec.txt")
    
    # call glove2word2vec script
    # default way (through CLI): python -m gensim.scripts.glove2word2vec --input <glove_file> --output <w2v_file>
    glove2word2vec(glove_file, tmp_file)
    return KeyedVectors.load_word2vec_format(tmp_file)


# returns the average, n dimensioned, vector embedding of every word in a given document
# it sums all the vector values of every word in the document and then divides them by their count
# if a given word is not in the vocabulary of the model, it ignores it
def average_word_vectors(words, model, vocabulary, num_features):
    feature_vector = np.zeros((num_features,),dtype="float64")
    nwords = 0.
    for word in words:
        if word in vocabulary: 
            nwords = nwords + 1.
            feature_vector = np.add(feature_vector, model[word])
    
    if nwords:
        feature_vector = np.divide(feature_vector, nwords)
        
    return feature_vector

def average(lst): 
    return sum(lst) / len(lst) 

In [3]:
word2vec_model = in_pickle("C:/Users/Abdo/Project/pre-trained word2vec models-compiled/",
                               "word2vecModel_glove_6b_300d")

In [4]:
vocabulary = set(word2vec_model.wv.vocab)

  """Entry point for launching an IPython kernel.


In [6]:
#data_frames = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/","pandas_data_frame")
label_documentVector_dictionary = in_pickle("C:/Users/Abdo/Project/pickle_files/12_21_31/",
                                                "label_documentVector_dictionary_12_21_31")

In [4]:
# key : label
# value: [ (doc_name, doc_vector_presentation), ......  ]
label_documentVector_dictionary = {"0_label": [], "1_label": []}

In [44]:
for index, row in data_frames.iterrows():
    movie_id = row["movie_id"]
    rating = row["rating"]
    movie_words = row["movie_words"]
    
    doc_vector = average_word_vectors(movie_words, word2vec_model, vocabulary, 300)
    label_documentVector_dictionary[rating].append((movie_id, doc_vector))

In [46]:
out_pickle("C:/Users/Abdo/Project/pickle_files/13_21_31/",
               "label_documentVector_dictionary_13_21_31", label_documentVector_dictionary)

In [7]:
# [ (doc_vector, label), .....  ]
# it has equal number of 0_label 1_label samples
doc_vector_label_pairs = []

In [8]:
for movie_info in (label_documentVector_dictionary["0_label"]):
    movie_vector = movie_info[1]
    doc_vector_label_pairs.append((movie_vector, "0_label")) 

for movie_info in (label_documentVector_dictionary["1_label"]):
    movie_vector = movie_info[1]
    doc_vector_label_pairs.append((movie_vector, "1_label")) 

In [9]:
# shuffle the array to randomly distribute the 0_label 1_label inside it
shuffle(doc_vector_label_pairs)
shuffle(doc_vector_label_pairs)

In [10]:
x = []
y = []

for data_pair in doc_vector_label_pairs:
    x.append(data_pair[0])
    y.append(data_pair[1])

In [11]:
# change the labeling -> 0_label : 1,  1_label : 0

# 0 data is 75%
# 1 data is 25%
inverse_labeled_y = []
for element in y:
    if element == "0_label":
        inverse_labeled_y.append(1)
    else:
        inverse_labeled_y.append(0)

In [11]:
training_data_percentage = 0.7
training_data_size = int(len(x) * training_data_percentage)

In [5]:
from sklearn.model_selection import cross_validate 
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [53]:
# key: kernel-c-gamma
# value : (corresponding SVM classifier, result_dict)
#   result_dict -> key:scoring type, value: scoring value
svm_classifiers_dict = {}

In [15]:
kernels = ["poly", "rbf", "sigmoid","linear"]
c_values_1 = [0.001, 1, 5] # common between all kernels
gamma_values_1 = [0.001, 1, 5] # for rbf, sigmoid, poly

#c_values_2 = [0.0001, 2, 5, 10] # common between all kernels
#gamma_values_2 = [0.0001, 2, 5, 10] # for rbf, sigmoid, poly

In [6]:
scoring = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

In [7]:
# test the given classifier using cross validation with different scorings
# return the result as a dictionary
# keys: 'accuracy', 'precision', 'recall', 'f1_score', 'confusion_matrix' 
# which store metrics values on each fold for certain metric.
def test_classifier(clf, x, y, scoring):
    results = model_selection.cross_validate(estimator=clf, X=x, y=y, cv=10, scoring=scoring)
    return results

# makes an SVM classifier using every possible combination of these parameters
# it then tests every classifier using K-fold cross validation
# then, it adds the results(precision, recall, accuracy, ....) to svm_classifiers_dict
# it then pickles out the svm_classifiers_dict
def get_svm_classifiers(kernel, gamma_values, c_values ,svm_classifiers_dict, x, y, scoring):
    if kernel == "linear":
        for c in c_values:
            clf = svm.SVC(kernel=kernel, C=c)
            results = test_classifier(clf, x, y, scoring)
            svm_classifiers_dict[kernel+"-"+str(c)] = (clf, results) 
    else:
        for c in c_values:
            for gamma in gamma_values:
                clf = svm.SVC(kernel=kernel, C=c, gamma=gamma)
                results = test_classifier(clf, x, y, scoring)
                svm_classifiers_dict[kernel+"-"+str(c)+"-"+str(gamma)] = (clf, results)
    
    #pickle out the svm_classifiers_dict
    out_pickle("C:/Users/Abdo/Project/pickle_files/13_21_31/","svm_classifiers_dict", svm_classifiers_dict)

# utf8: true if the file is a .txt file, false otherwise
def test_classifier_on_outside_data(classifer, directory_path, txt_file_name, utf8):
    txt_data_tokens = pre_processer.preprocess_file(directory_path, txt_file_name, utf8)
    #print(txt_data_tokens)
    text_vector_representation = average_word_vectors(txt_data_tokens, word2vec_model, vocabulary, 300)
    
    prediction = classifer.predict([text_vector_representation])
    
    return prediction


In [28]:
#svm_classifiers_dict = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/11_21_31/","svm_classifiers_dict")

In [8]:
clf = in_pickle("C:/Users/Abdo/Project/pickle_files/","svm_final_classifier")

In [63]:
#clf = svm.SVC(kernel="rbf", gamma=5, C=5)

In [17]:
#clf_results = test_classifier(clf, x, inverse_labeled_y, scoring)

In [64]:
#clf.fit(x, inverse_labeled_y)

SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=5, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [65]:
prediction = test_classifier_on_outside_data(clf, "C:/Users/Abdo/Project/test_data/","0_label-6.txt", True)
print(prediction)

0_label-6.txt


[1]


In [23]:
out_pickle("C:/Users/Abdo/Project/pickle_files/","svm_final_classifier", clf)

In [59]:
#get_svm_classifiers("linear", gamma_values_1, c_values_1, svm_classifiers_dict, x, inverse_labeled_y, scoring)

In [60]:
#get_svm_classifiers("poly", gamma_values_1, c_values_1, svm_classifiers_dict, x, inverse_labeled_y, scoring)

In [61]:
#get_svm_classifiers("rbf", gamma_values_1, c_values_1, svm_classifiers_dict, x, inverse_labeled_y, scoring)

In [62]:
#get_svm_classifiers("sigmoid", gamma_values_1, c_values_1, svm_classifiers_dict, x, inverse_labeled_y, scoring)

  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)


In [26]:
# key: name of word2vec model
# value: corresponding data frame
classifiers_results_dict = {}

In [17]:
word2vec_models_paths = ["11_21_31", "12_21_31", "13_21_31"]

In [29]:
for word2vec_model_path in word2vec_models_paths:
    svm_classifiers_dict = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/"+word2vec_model_path+"/","svm_classifiers_dict")
    
    # key: classifier, test_accuracy, train_accuracy, .....
    # value: [ row1_value, row2_value,  ]
    pandas_dict_svm_classifiers = {}
    
    # initialize the values of the svm_classifiers_dict
    # end up with pandas_dict_svm_classifiers having keys of (classifier, test_accuracy, .....)
    pandas_dict_svm_classifiers["classifier"] = []
    for classifier in svm_classifiers_dict.keys():
        list_of_folds_results = svm_classifiers_dict[classifier][1]
        for result in list_of_folds_results:
            pandas_dict_svm_classifiers[result] = []
   
    # take the average value from the 10 folds for each score in every classifier
    for classifier in svm_classifiers_dict.keys():
        # the name of the current svm classifier (linear-1-1 for example)
        pandas_dict_svm_classifiers["classifier"].append(classifier)
        # its results (test_accuracy, .....)
        dict_of_folds_results = svm_classifiers_dict[classifier][1]
        # for each one of them 
        for result in dict_of_folds_results:
            # append its average, because its 10 value one for each fold, to the corresponding key's list
            pandas_dict_svm_classifiers[result].append( average(dict_of_folds_results[result]) )
    
    # turn the dictionary into a pandas data frame
    classifiers_results_data_frame = pd.DataFrame(pandas_dict_svm_classifiers)
    # drop out its fit_time, score_time columns
    classifiers_results_data_frame.drop("fit_time", 1, inplace=True)
    classifiers_results_data_frame.drop("score_time", 1, inplace=True)
    
    classifiers_results_dict[word2vec_model_path] = classifiers_results_data_frame



In [40]:
#out_pickle("C:/Users/Abdo/Project/pickle_files/","classifiers_results_dict", classifiers_results_dict)

In [3]:
classifiers_results_dict = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/","classifiers_results_dict")

In [4]:
classifiers_results_panda_frames_dict = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/","classifiers_results_panda_frames_dict")

In [54]:
# key : word2vec model name (example: 11_21_31)
# value: dict
#        key: kernel name (linear, rbf, ...)
#        value: data frame (c_gamma, test_accuracy, .....)
classifiers_results_panda_frames_dict = {}

In [55]:
# key: test_accuracy, ....
# value: [ row1, row2, ....]
# this is meant to be used by the pandas to make data frames of its shape
pandas_dict_svm_classifiers = {}
for classifier in svm_classifiers_dict.keys():
    list_of_folds_results = svm_classifiers_dict[classifier][1]
    # skip the time scores
    for result in list(list_of_folds_results.keys())[2:]:
        pandas_dict_svm_classifiers[result] = []
    # add a key for the c and gamma values
    pandas_dict_svm_classifiers["c"] = []
    pandas_dict_svm_classifiers["gamma"] = []

In [56]:
for word2vec_model_name in word2vec_models_paths:
    classifiers_results_panda_frames_dict[word2vec_model_name] = {}

for word2vec_model_name in word2vec_models_paths:
    for kernel in kernels:
        classifiers_results_panda_frames_dict[word2vec_model_name][kernel] = pd.DataFrame(pandas_dict_svm_classifiers)

In [59]:
# for each word2vec model (11_21_31, ...)
for word2vec_model_name in classifiers_results_dict:
    
    # for each row in that model's data frame
    for index, row in classifiers_results_dict[word2vec_model_name].iterrows():
        # elements to it should added in this order:
        # test_accuracy, train_accuracy, test_precision, train_precision, test_recall, train_recall, test_f1_score, train_f1_score, c, gamma
        list_to_be_added_to_data_frame = []
        kernel_name = row["classifier"].split("-")[0]
        c = row["classifier"].split("-")[1]
        gamma = 0
        if (kernel_name != "linear"):
            gamma = row["classifier"].split("-")[2]
        
        list_to_be_added_to_data_frame.append(row["test_accuracy"])
        list_to_be_added_to_data_frame.append(row["train_accuracy"])
        list_to_be_added_to_data_frame.append(row["test_precision"])
        list_to_be_added_to_data_frame.append(row["train_precision"])
        list_to_be_added_to_data_frame.append(row["test_recall"])
        list_to_be_added_to_data_frame.append(row["train_recall"])
        list_to_be_added_to_data_frame.append(row["test_f1_score"])
        list_to_be_added_to_data_frame.append(row["train_f1_score"])
        list_to_be_added_to_data_frame.append(c)
        list_to_be_added_to_data_frame.append(gamma)
        
        # get the data frame corresponding with the word2vec model and kernel 
        df = classifiers_results_panda_frames_dict[word2vec_model_name][kernel_name]
        df.loc[index] = list_to_be_added_to_data_frame

In [66]:
out_pickle("C:/Users/Abdo/Project/pickle_files/","classifiers_results_panda_frames_dict", classifiers_results_panda_frames_dict)

In [5]:
# remove the train_accuracy, train_precision, train_recall, train_f1_score columns
for word2vec_model in classifiers_results_panda_frames_dict:
    for kernel in classifiers_results_panda_frames_dict[word2vec_model]:
        classifiers_results_panda_frames_dict[word2vec_model][kernel].drop(
            ["train_accuracy","train_precision", "train_recall", "train_f1_score"], axis=1, inplace=True)

In [6]:
# take the average test_accuracy, test_precision, test_recall and put the value in a new column called averaged_value
for word2vec_model in classifiers_results_panda_frames_dict:
    # for each kernel
    for kernel in classifiers_results_panda_frames_dict[word2vec_model]:
        # create the new column array
        averaged_values = []

        # for each row in that model's data frame, find the averaged value of (test_accuracy, test_precision, test_recall)
        # add it to the averaged_values list
        for index, row in classifiers_results_panda_frames_dict[word2vec_model][kernel].iterrows():
            averaged_value = (row["test_accuracy"] + row["test_precision"] + row["test_recall"]) /3
            averaged_values.append(averaged_value)
        # add the new column
        classifiers_results_panda_frames_dict[word2vec_model][kernel]["averaged_values"] = averaged_values

In [7]:
# sort the data frames with respect to the averaged_values
# take the average test_accuracy, test_precision, test_recall and put the value in a new column called averaged_value
for word2vec_model in classifiers_results_panda_frames_dict:
    # for each kernel
    for kernel in classifiers_results_panda_frames_dict[word2vec_model]:
        classifiers_results_panda_frames_dict[word2vec_model][kernel].sort_values("averaged_values", inplace=True, ascending=False)

In [36]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
params = {'n_neighbors':[5,10],
          'leaf_size':[1,5],
          'weights':['uniform', 'distance'],
          'algorithm':['auto', 'ball_tree','kd_tree','brute']}

In [52]:
# key: classifier_name (algorithm-n_neighbors-leaf_size-weights)
# value: (classifier, results)

knn_classifiers_results_dict = {}

for n_neighbors in params["n_neighbors"]:
    for leaf_size in params["leaf_size"]:
        for weight in params["weights"]:
            for algorithm in params["algorithm"]:
                # create the classifier
                classifier = KNeighborsClassifier(n_jobs=-1, n_neighbors=n_neighbors, leaf_size=leaf_size, weights=weight, algorithm=algorithm)
                # name of it: algorithm-n_neighbors-leaf_size-weights
                classifier_name = str(algorithm)+"-"+str(n_neighbors)+"-"+str(leaf_size)+"-"+str(weight)
                # get the cross validated results for each classifier
                results = test_classifier(classifier, x, inverse_labeled_y, scoring)
                # add the values to the knn_classifiers_results_dict 
                knn_classifiers_results_dict[classifier_name] = (classifier, results)

In [53]:
#out_pickle("C:/Users/Abdo/Project/pickle_files/","knn_classifiers_results_dict", knn_classifiers_results_dict)

In [3]:
knn_classifiers_results_dict = pro.in_pickle("C:/Users/Abdo/Project/pickle_files/","knn_classifiers_results_dict")

In [38]:
# key: test_accuracy, .... (columns)
# value: [ row1, row2, ....]  (rows)
knn_classifiers_results_panda_frames_dict = {}

In [39]:
# initialize the values in pandas_columns
for score_type in list(knn_classifiers_results_dict["ball_tree-5-1-uniform"][1].keys()):
    knn_classifiers_results_panda_frames_dict[score_type] = [] # test_accuracy, ....

knn_classifiers_results_panda_frames_dict["n"] = []
knn_classifiers_results_panda_frames_dict["leaf"] = []
knn_classifiers_results_panda_frames_dict["weights"] = []
knn_classifiers_results_panda_frames_dict["algorithm"] = [] 

In [40]:
for knn_classifier in knn_classifiers_results_dict:
    # add the values to each column, each iteration correspond to a row
    knn_classifiers_results_panda_frames_dict["n"].append(knn_classifier.split("-")[1])
    knn_classifiers_results_panda_frames_dict["algorithm"].append(knn_classifier.split("-")[0])
    knn_classifiers_results_panda_frames_dict["leaf"].append(knn_classifier.split("-")[2])
    knn_classifiers_results_panda_frames_dict["weights"].append(knn_classifier.split("-")[3])
    # test_accuracy, .....
    for score_type in list(knn_classifiers_results_dict["ball_tree-5-1-uniform"][1].keys()):
        knn_classifiers_results_panda_frames_dict[score_type].append(average(knn_classifiers_results_dict[knn_classifier][1][score_type]))  



In [41]:
knn_classifiers_results_panda_frames = pd.DataFrame(knn_classifiers_results_panda_frames_dict)

In [42]:
knn_classifiers_results_panda_frames.drop("fit_time",1, inplace=True)
knn_classifiers_results_panda_frames.drop("score_time",1, inplace=True)
knn_classifiers_results_panda_frames.drop("train_accuracy",1, inplace=True)
knn_classifiers_results_panda_frames.drop("train_precision",1, inplace=True)
knn_classifiers_results_panda_frames.drop("train_recall",1, inplace=True)
knn_classifiers_results_panda_frames.drop("train_f1_score",1, inplace=True)

In [44]:
averaged_values = []
# for each row in that model's data frame, find the averaged value of (test_accuracy, test_precision, test_recall)
# add it to the averaged_values list
for index, row in knn_classifiers_results_panda_frames.iterrows():
    averaged_value = (row["test_accuracy"] + row["test_precision"] + row["test_recall"]) /3
    averaged_values.append(averaged_value)
    # add the new column
knn_classifiers_results_panda_frames["averaged_values"] = averaged_values

In [46]:
knn_classifiers_results_panda_frames.sort_values("averaged_values", inplace=True, ascending=False)

In [23]:
from keras.models import Sequential
from keras.layers import Dense
import keras
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import numpy
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [24]:
# baseline model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(300, input_dim=300, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [25]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [27]:
# normalize the data
normalized_x = keras.utils.normalize(x, axis=-1, order=2)

In [22]:
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, np.array(x), inverse_labeled_y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 80.42% (2.70%)
