In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, SGD
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score, accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
import seaborn as sns
%matplotlib inline

In [66]:
df = pd.read_csv('../data/user_visit_duration.csv')
X = df[['Time (min)']].values # need values since cross_val_score will complain with pandas series
y = df['Buy'].values # need values since cross_val_score will complain with pandas series

In [67]:
def build_logistic_regression_model():
    model = Sequential()
    model.add(Dense(1, input_shape=(1,), activation='sigmoid'))
    model.compile(SGD(lr=0.5), 'binary_crossentropy', metrics=['accuracy'])
    return(model)

In [68]:
lrmodel = build_logistic_regression_model()
lrmodel.fit(X, y, epochs=100, verbose=0)
y_pred = lrmodel.predict(X)
y_class_pred = y_pred > 0.5

In [69]:
model = KerasClassifier(build_fn=build_logistic_regression_model, epochs=100, verbose=0)

In [70]:
cv = KFold(5, shuffle=True, random_state=42)

In [71]:
scores = cross_val_score(model, X=X, y=y, cv=cv)

In [62]:
scores

array([ 0.64999998,  0.75      ,  0.75      ,  1.        ,  0.94999999])

In [53]:
print("The cross validation accuracy is {:0.4f} ± {:0.4f}".format(scores.mean(), scores.std()))

The cross validation accuracy is 0.8116 ± 0.1222


In [55]:
confusion_matrix(y, y_class_pred)

array([[41,  9],
       [ 8, 42]])

In [56]:
def pretty_confusion_matrix(y_true, y_pred, labels=["False", "True"]):
    cm = confusion_matrix(y_true, y_pred)
    pred_labels = ["Predicted" + l for l in labels]
    df = pd.DataFrame(cm, index=labels, columns=pred_labels)
    return(df)

In [57]:
pretty_confusion_matrix(y, y_class_pred, ["Not Buy", "Buy"])

Unnamed: 0,PredictedNot Buy,PredictedBuy
Not Buy,41,9
Buy,8,42


In [58]:
print("Precision:\t{:0.3f}".format(precision_score(y, y_class_pred)))
print("Recall:  \t{:0.3f}".format(recall_score(y, y_class_pred)))
print("F1 Score:\t{:0.3f}".format(f1_score(y, y_class_pred)))

Precision:	0.824
Recall:  	0.840
F1 Score:	0.832


In [59]:
print(classification_report(y,y_class_pred))

             precision    recall  f1-score   support

          0       0.84      0.82      0.83        50
          1       0.82      0.84      0.83        50

avg / total       0.83      0.83      0.83       100

