In [None]:
# CLASSIFIERS: logistic regression and SVM classifiers (scikit and statsmodel) with boundary plot - GC example

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing, grid_search
from sklearn.metrics import classification_report
from sklearn.datasets import make_classification, make_blobs, make_gaussian_quantiles
from sklearn.cross_validation import train_test_split
import statsmodels.api as sm


def show_details(thetype, model, X, Y, predictions, a, b, intercept):
    summary = pd.DataFrame(X)
    summary['Yval'] = Y  
    summary['predictions'] = (predictions>0.5).astype(int)   # print("Pred's", summary['predictions'].values)
    accuracy = model.score(X, Y) if hasattr(model, 'score') else \
        len(summary[summary.predictions == summary.Yval]) / len(summary.predictions)
    print('%s Accuracy= %0.4f,' %  (thetype, accuracy), end=' ')
    print('Mean= %0.4f, Error Count = %i' % (Y.mean(), len(summary[summary.predictions != summary.Yval] )))
    if a and b and intercept and thetype == 'Training':
        print('Coefficients: %0.4f %0.4f, Intercept %0.4f' % (a, b, intercept))


def plotter(results, Xtrain, Ytrain, Xtest, Ytest, thetitle):
    x1plot_min, x1plot_max = min(Xtrain[:,0].min(), Xtest[:,0].min())-0.2, max(Xtrain[:,0].max(), Xtest[:,0].max()) + 0.2
    x2plot_min, x2plot_max = min(Xtrain[:,1].min(), Xtest[:,1].min())-0.2, max(Xtrain[:,1].max(), Xtest[:,1].max()) + 0.2
    X1inc = (x1plot_max - x1plot_min) / 200; X2inc = (x2plot_max - x2plot_min) / 200;
    x1plot, x2plot = np.meshgrid(np.arange(x1plot_min, x1plot_max, X1inc), np.arange(x2plot_min, x2plot_max, X2inc))    
    Xplot = np.c_[x1plot.ravel(), x2plot.ravel()]
    (model, trainPreds, testPreds) = results
    if 'statsmodel' in str(model.__class__):
        Xplot = sm.add_constant(Xplot, prepend=True)

    plotPreds = model.predict(Xplot).reshape(x1plot.shape)
    fig, (ax0, ax1) = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(12,5))
    plt.xlim(x1plot_min, x1plot_max); plt.ylim(x2plot_min, x2plot_max); 
    plt.xlabel('X1'); plt.ylabel('X2'); 
    ax0.set_title(thetitle + '- Training', fontsize=14); ax0.grid(True)
    ax0.contourf(x1plot, x2plot, (plotPreds>0.5).astype(int), cmap=plt.cm.Paired, alpha=0.5)
    ax0.scatter(Xtrain[:,0], Xtrain[:,1], c=Ytrain, cmap=plt.cm.Set1)   
    ax1.set_title(thetitle + '- Test', fontsize=14); ax1.grid(True)
#   ax1.pcolormesh(x1plot, x2plot, plotPreds, cmap=plt.cm.Paired)
    ax1.contourf(x1plot, x2plot, (plotPreds>0.5).astype(int), cmap=plt.cm.Paired, alpha=0.5)
    ax1.scatter(Xtest[:,0], Xtest[:,1], c=Ytest, cmap=plt.cm.Set1) 
    try:
        a, b, intercept = model.coef_[0][0], model.coef_[0][1], model.intercept_
    except (AttributeError, ValueError):
        try:
            a, b, intercept = model.params[1], model.params[2], model.params[0]  
        except AttributeError:
            a = b = intercept = None
    if a and b and intercept:
        Xs = np.linspace(x1plot.min(), x1plot.max(), 40)
        ax0.plot(Xs, (-intercept - Xs * a) / b, 'k--')
        ax1.plot(Xs, (-intercept - Xs * a) / b, 'k--')
    plt.tight_layout(); plt.show() 
    show_details('Training', model, Xtrain, Ytrain, trainPreds, a, b, intercept)
    show_details('Test', model, Xtest, Ytest, testPreds, a, b, intercept)


def classify(classifier, Xtrain, Xtest):
    if 'statsmodel' in str(classifier.__class__):
        model = classifier.fit(disp=False)
    else:
        model = classifier.fit(Xtrain, Ytrain)
    trainPreds = model.predict(Xtrain)
    testPreds = model.predict(Xtest)
    return model, trainPreds, testPreds
    
# initialise training and test data sets
np.set_printoptions(suppress=True); np.set_printoptions(precision=4); np.set_printoptions(linewidth=105)
X, Y = make_classification(n_samples=1000, n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=14)
X[:,0] *= 30 # to alter scaling of data (so scaling input b4 in regression shows benefit)
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.5, random_state=0)
Xtrainint = sm.add_constant(Xtrain, prepend = True)
Xtestint = sm.add_constant(Xtest, prepend = True)
scaler = preprocessing.StandardScaler().fit(X)
Xtrainscaled = scaler.transform(Xtrain)
Xtestscaled = scaler.transform(Xtest)

resultsLogR = classify(LogisticRegression(C = 0.1), Xtrain, Xtest)
plotter(resultsLogR, Xtrain, Ytrain, Xtest, Ytest, 'Logistic Regression Scikit')
resultsLogR2 = classify(sm.Logit(Ytrain, Xtrainint), Xtrainint, Xtestint)
plotter(resultsLogR2, Xtrain, Ytrain, Xtest, Ytest, 'Logistic Regression Statsmodel')
resultsSVC = classify(SVC(C=1), Xtrain, Xtest)
plotter(resultsSVC, Xtrain, Ytrain, Xtest, Ytest, 'SVM Classifier - not scaled')
resultsSVCsc = classify(SVC(C=10), Xtrainscaled, Xtestscaled)
plotter(resultsSVCsc, Xtrainscaled, Ytrain, Xtestscaled, Ytest, 'SVM Classifier - Scaled')
resultsSVR = classify(SVR(C=1), Xtrain, Xtest)
plotter(resultsSVR, Xtrain, Ytrain, Xtest, Ytest, 'SVM Regression - not scaled')
resultsSVRsc = classify(SVR(C=10), Xtrainscaled, Xtestscaled)
plotter(resultsSVRsc, Xtrainscaled, Ytrain, Xtestscaled, Ytest, 'SVM Regression - Scaled')

resultsDtree = classify(DecisionTreeClassifier(), Xtrain, Xtest)
plotter(resultsDtree, Xtrain, Ytrain, Xtest, Ytest, 'Decision Tree Classifier')
resultsGNB = classify(GaussianNB(), Xtrain, Xtest)
plotter(resultsGNB, Xtrain, Ytrain, Xtest, Ytest, 'Gaussian Naive Bayes')

In [None]:
# try many classifiers with many C values and scaled and non-scaled data (Grid Search example)


def gridSearch(parameters):
    for model in [resultsLogR[0], resultsSVC[0], resultsSVCsc[0], resultsSVR[0], resultsSVRsc[0]]:
        grid = grid_search.GridSearchCV(model, parameters)
    
        grid.fit(Xtrain, Ytrain)
        print("Best parameters set on Non-Scaled training set for %s are %s:" % (model.__class__.__name__, grid.best_params_))
        print("Grid scores on training set:")
        for params, mean_score, scores in grid.grid_scores_:
            print("%6.3f (+/-%0.3f) for %r" % (mean_score, scores.std() * 2, params))
        print("Detailed classification report on training set:")
        y_true, y_pred = Ytest, (grid.predict(Xtest)>0.5).astype(int)
        print(classification_report(y_true, y_pred))
    
        grid.fit(Xtrainscaled, Ytrain)
        print("Best arameters set on Scaled training set for %s are %s:" % (model.__class__.__name__, grid.best_params_))
        print("Grid scores on training set:")
        for params, mean_score, scores in grid.grid_scores_:
            print("%6.3f (+/-%0.3f) for %r" % (mean_score, scores.std() * 2, params))
        print("Detailed classification report on training set:")
        y_true, y_pred = Ytest, (grid.predict(Xtestscaled)>0.5).astype(int)
        print(classification_report(y_true, y_pred))
    
        
def result(model, thetype, trainScore, testScore, trainPreds, testPreds):
    global dfr
    trainErrors = sum(Ytrain!=trainPreds)
    testErrors = sum(Ytest!=testPreds)
    rating = testErrors + trainErrors + abs(testErrors - trainErrors)
    C = model.C if hasattr(model,'C') else np.NaN
    dfr = dfr.append({'model': model.__class__.__name__[:12], 'C': C, 'scale': thetype, 'trainScore': trainScore, 
                'testScore': testScore, 'trainErrs': trainErrors, 'testErrs': testErrors, 'rating': rating}, ignore_index=True)
 
    
def manualSearch(parameters):
    classifiers = []
    for clf in [LogisticRegression, SVC, SVR,]:
        for i, C in enumerate(parameters['C']):
            classifiers.append(clf(C=C))
    classifiers.append(DecisionTreeClassifier())
    classifiers.append(GaussianNB())
        
    for clf in classifiers:
        model = clf.fit(Xtrain, Ytrain)
        trainScore = model.score(Xtrain, Ytrain)
        testScore  = model.score(Xtest, Ytest)
        trainPreds = (model.predict(Xtrain)>0.5).astype(int)
        testPreds  = (model.predict(Xtest)>0.5).astype(int)
        result(model, 'not scaled', trainScore, testScore, trainPreds, testPreds)

        modelScaled = clf.fit(Xtrainscaled, Ytrain)
        trainScore = modelScaled.score(Xtrainscaled, Ytrain)
        testScore = modelScaled.score(Xtestscaled, Ytest)
        trainPreds = (modelScaled.predict(Xtrainscaled)>0.5).astype(int)
        testPreds = (modelScaled.predict(Xtestscaled)>0.5).astype(int)
        result(model, 'Scaled', trainScore, testScore, trainPreds, testPreds)


dfr = pd.DataFrame()
params = {'C':[0.01, 0.1, 1, 10, 100, 1000], }
#gridSearch(params)
manualSearch(params)
dfr = dfr[['model', 'C', 'scale', 'trainScore', 'testScore', 'trainErrs', 'testErrs', 'rating']]
print('Best Model and Params:  \n', dfr[dfr.rating == min(dfr.rating)])
dfr.sort_values(['rating','testErrs','C'], ascending=[1,1,0])

In [None]:
# Generate Data examples - from internet

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_classification, make_blobs, make_gaussian_quantiles

plt.figure(figsize=(13, 13))
plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95)
seed = np.random.RandomState()

plt.subplot(321)
plt.title("One informative feature, one cluster per class", fontsize='small')
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_classes=3, random_state=seed)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired) 

plt.subplot(322)
plt.title("Two informative features, one cluster per class", fontsize='small')
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, random_state=seed)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)

plt.subplot(323)
plt.title("Two informative features, two clusters per class", fontsize='small')
X2, Y2 = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=seed)
plt.scatter(X2[:, 0], X2[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)

plt.subplot(324)
plt.title("Multi-class, two informative features, one cluster", fontsize='small')
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_classes=3, random_state=seed)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)

plt.subplot(325)
plt.title("Three blobs", fontsize='small')
X1, Y1 = make_blobs(n_features=2, centers=3, random_state=seed)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)

plt.subplot(326)
plt.title("Gaussian divided into three quantiles", fontsize='small')
X1, Y1 = make_gaussian_quantiles(n_features=2, n_classes=3, random_state=seed)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)

plt.show()

#plt.figure(figsize=(13, 33))
#plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95)
#for i in range(1,30):
#    plt.subplot(10,3,i)
#    plt.title("One informative feature, one cluster per class "+str(i), fontsize='small')
#    X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=i)
#    plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1, cmap=plt.cm.Paired)
#plt.show()

In [None]:
#Linear regression - GC example

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
np.set_printoptions(suppress=True); np.set_printoptions(precision=4); np.set_printoptions(linewidth=105)

np.random.seed(5)
samples=9
Xlin = pd.DataFrame(np.random.rand(samples,1)) * 10
Ylin = Xlin * (np.random.rand(samples,1)*0.5 + 1) + np.random.rand()*3

#scikit linear regression
LinR = LinearRegression(normalize=True)
modelLin = LinR.fit(Xlin, Ylin)
a = float(modelLin.coef_[0])
intercept= float(modelLin.intercept_)
fn = np.poly1d([a, intercept+0.1])  # add tiny bit to intercept so appears on plot
print('scikit coefficient %0.4f Intercept %0.4f, Accuracy=%.4f' % (a, intercept, modelLin.score(Xlin, Ylin)))

#scipy linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(np.array(Xlin)[:,0], np.array(Ylin)[:,0])
print('Scipy linregress, Slope=%.4f, Incpt=%.4f, R-sqd=%.4f, p-value=%.4f, Std Err=%.4f\n' %
     (slope, intercept, r_value ** 2, p_value, std_err))

plt.figure(figsize=(16,8))
Xs = np.linspace(-1, 10, 100)
plt.xlabel('x'); plt.ylabel('y')
plt.xlim(0, 10); plt.ylim(3, 17); plt.grid(True)
plt.plot(Xs, Xs * a + intercept -0.1, 'r--', label='Linear') # add tiny bit to ensure appears on plot
plt.plot(Xs, fn(Xs), 'g--', label = 'Linear f(n)')
plt.scatter(Xlin, Ylin)

# try different degrees of polynomial to fit
for degree in range(1,8): 
    theta = np.polyfit(x=Xlin[0], y=Ylin[0], deg=degree)
    fn = np.poly1d(theta) # plt.plot(Xs, fn(Xs))
    plt.plot(Xs, np.polyval(theta, Xs), label='degree %s' % degree)
    err = np.sqrt(np.sum((np.polyval(theta, Xlin) -Ylin)**2)[0]/Ylin.count())
    r2 = r2_score(Ylin, fn(Xlin) )
    print('Polyfit degree %i, Err %0.3f, R-sqd %0.4f, Theta %s' % (degree, err, r2, theta))
    
plt.legend(loc=4)
plt.show()

# Linear Regression using statsmodel gives same results
Xlin1 = sm.add_constant(Xlin, prepend=False)
Ylin1 = Ylin
OLS = sm.OLS(Ylin1, Xlin1)
modelOLS = OLS.fit()
print (modelOLS.summary())

In [None]:
# manual matrix driven linear regression manually using gradient descent (from Coursera machine learning - Stanford Uni)

import pandas as pd
import numpy as np

def J_cost(X, y, theta):
    m, _ = X.shape
    predictions = X * theta
    square_errors = np.square(predictions - y)
    J = 1/(2*m) * sum(square_errors)
    return round(float(J), 4)

filename = 'ex1data1.txt'
df = pd.read_csv(filename, header=None, names=['d1','d2'] )
X = np.matrix(df.d1).T
m, _ = X.shape
X = np.matrix(np.concatenate((np.ones((m,1)), X), axis=1))
y = np.matrix(df.d2).T
theta = np.matrix(np.zeros((2,1)))
alpha = 0.01
iterations = 1500
J = []

for i in range(iterations):
    h =  X * theta
    e = h - y
    adj = X.T * e * alpha * 1 / m
    theta = theta - adj
    J.append( J_cost(X, y, theta) )

print('Gradient Descent Theta ', str(theta.T))
# calculate linear regression theta using normal equation
theta1 = (X.T * X).I * X.T * y
print('Normal Equation Theta ',theta1.T)

Xnew = df.d1.values
theta2 = np.polyfit(x=Xnew, y=y, deg=1)
print('Scipy Theta ',theta2[1], theta2[0])

In [None]:
# fitting multiple polynomials using scikit learn pipeline - example from internet

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
%matplotlib inline

def f(x):
    return np.sin(2 * np.pi * x)

# generate points used to plot and display them
x_plot = np.linspace(0, 1, 100)
np.random.seed(9)
n_samples = 100
X = np.random.uniform(0, 1, size=n_samples)[:, np.newaxis]
y = f(X) + np.random.normal(scale=0.3, size=n_samples)[:, np.newaxis]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8)

ax = plt.gca()
ax.plot(x_plot, f(x_plot), color='green')
ax.scatter(X_train, y_train, s=10)
ax.set_ylim((-2, 2)); ax.set_xlim((0, 1))
ax.set_ylabel('y'); ax.set_xlabel('x')

# fit different polynomials and plot approximations
fig, axes = plt.subplots(5, 2, figsize=(15, 15))
for ax, degree in zip(axes.ravel(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]):
    est = make_pipeline(PolynomialFeatures(degree), LinearRegression())
    est.fit(X_train, y_train)
    ax.plot(x_plot, f(x_plot), color='green')
    ax.scatter(X_train, y_train, s=10)
    ax.plot(x_plot, est.predict(x_plot[:, np.newaxis]), color='red', label='degree=%d' % degree)
    ax.set_ylim((-2, 2)); ax.set_xlim((0, 1))
    ax.set_ylabel('y'); ax.set_xlabel('x')
    ax.legend(loc='upper right',fontsize='small')  #, fontsize='small')
    
plt.tight_layout(); plt.show()

# plot train and test error for each degree
train_error = np.empty(10); test_error = np.empty(10)
for degree in range(10):
    est = make_pipeline(PolynomialFeatures(degree), LinearRegression())
    est.fit(X_train, y_train)
    train_error[degree] = mean_squared_error(y_train, est.predict(X_train))
    test_error[degree] = mean_squared_error(y_test, est.predict(X_test))
    print('degree %s train error %0.3f test error %0.4f' % (degree, train_error[degree] , test_error[degree]))

plt.plot(np.arange(10), train_error, color='green', label='train')
plt.plot(np.arange(10), test_error, color='red', label='test')
plt.ylim((0.0, 1e0))
plt.ylabel('log(mean squared error)'); plt.xlabel('degree')
plt.legend(loc='lower left')
plt.tight_layout(); plt.show()

In [None]:
# comparision of Scikit learn classifiers (from internet)

from sklearn import cross_validation
# Logistic Regression
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# load the iris datasets
dataset = datasets.load_iris()
# fit a logistic regression model to the data
model = LogisticRegression()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

# Gaussian Naive Bayes
from sklearn import datasets
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
# load the iris datasets
dataset = datasets.load_iris()
# fit a Naive Bayes model to the data
model = GaussianNB()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

# k-Nearest Neighbor
from sklearn import datasets
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
# load iris the datasets
dataset = datasets.load_iris()
# fit a k-nearest neighbor model to the data
model = KNeighborsClassifier()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

# Decision Tree Classifier
from sklearn import datasets
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
# load the iris datasets
dataset = datasets.load_iris()
# fit a CART model to the data
model = DecisionTreeClassifier()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

# Support Vector Machine
from sklearn import datasets
from sklearn import metrics
from sklearn.svm import SVC
# load the iris datasets
dataset = datasets.load_iris()
# fit a SVM model to the data
model = SVC()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

#K-fold cross-validation ############################################################################

# We give cross_val_score a model, the entire data set and its "real" values, and the number of folds:
scores = cross_validation.cross_val_score(model, dataset.data, dataset.target, cv=5)

# Print the accuracy for each fold and the mean
print('\nK-fold cross validation scores on SVC', scores)
print(scores.mean())

In [None]:
# neural network - only in 0.18dev version of sklearn so cant be run :(

%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier

X = [[0., 0.], [1., 1.]]
y = [0, 1]
clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y) 

clf.predict([0,1])

In [None]:
#Single Decision Tree and Random Forest classifiers, with Graphviz output of trees

from sklearn import datasets, tree
from sklearn.ensemble import RandomForestClassifier
from IPython.display import Image, display
from sklearn.externals.six import StringIO  
import pydotplus as pydot

dataset = datasets.load_iris()

# first try single decision treee
clf = tree.DecisionTreeClassifier()
clf = clf.fit(dataset.data, dataset.target)
print ('prediction=', clf.predict([[3, 1, 4, 2]]))
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=dataset.feature_names)  
graph = pydot.graph_from_dot_data(dot_data.getvalue())  
display(Image(graph.create_png()))


# then try random forest
clf1 = RandomForestClassifier(n_estimators=2000)
clf1 = clf1.fit(dataset.data, dataset.target)
for i in range(10):
    dot_data = StringIO()  
    tree.export_graphviz(clf1.estimators_[i], out_file=dot_data, feature_names=dataset.feature_names)  
    graph = pydot.graph_from_dot_data(dot_data.getvalue())  
    display(Image(graph.create_png(), width=400, height=200))