In [8]:
# Libraries including pandas_bokeh
import numpy as np
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show, output_notebook, export_png
from sklearn.linear_model import LogisticRegression
import pandas_bokeh

output_notebook()
pandas_bokeh.output_notebook()


In [9]:
# clean up the data and generate summary histograms
data = pd.read_csv("data/FoodData/ifood_df.csv")
data.head()
data["Response"] = data["AcceptedCmpOverall"].map({0: 0, 2: 1, 3: 1, 4: 1})


data = data.dropna()
x = data["MntTotal"].values
y = data["Response"].values
MntHist = (data["MntTotal"] / 100).plot_bokeh(
    kind="hist",
    bins=np.arange(0, 25, 1),
    toolbar_location=None,
    xlabel="100's of $",
    legend=None,
    show_figure=False,
    panning=False,
    hovertool=False,
    zooming=False,
)
ResHist = (data["Response"]).plot_bokeh(
    kind="hist",
    toolbar_location=None,
    xlabel="Reject/Accept Responses",
    legend=None,
    show_figure=False,
    xticks=[0, 1],
    panning=False,
    zooming=False,
    hovertool=False,
)
K = pandas_bokeh.plot_grid([[MntHist, ResHist]], toolbar_location=None)
export_png(K, filename="../img/FoodDataPlot.png")



'/home/jet08013/GitHub/BookFun/img/FoodDataPlot.png'

In [13]:
# fit and plot the model; bin the data to compute probabilities
X = x.reshape(-1, 1)
L = LogisticRegression()
L.fit(X, y)
P = L.predict_proba(X)[:, 1]
I = np.argsort(X[:, 0])

data["DollarRange"] = pd.cut(
    data["MntTotal"], bins=np.arange(0, 2500, 100), labels=np.arange(0, 2400, 100)
)
z = data.groupby("DollarRange")["Response"].mean()
n = data.groupby("DollarRange")["Response"].count()
u = L.predict_proba(100 * np.arange(0, 25).reshape(-1, 1))

h = figure(toolbar_location=None)
h.xaxis.axis_label = "100's of $ expenditures"
h.yaxis.axis_label = "Probability of Acceptance"
h.scatter(x=np.arange(0, 24), y=z)
h.line(x=np.arange(0, 25), y=u[:, 1])

show(h)
export_png(h, filename="../img/FoodLogisticFit.png")


'/home/jet08013/GitHub/BookFun/img/FoodLogisticFit.png'