# Classify data with user input

## Functions

In [2]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn import linear_model
from sklearn import neighbors
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score as cvs
from pandas_confusion import ConfusionMatrix

def getData(filename):
    ''' Given a pkl filename, read it in as a pandas dataframe and return the transpose (features as column names)
        of the dataframe. '''
    data = pd.read_pickle(filename)
    data = data.T
    return data

def removeAnomalies(data, parted):
    ''' Given a pandas dataframe and a boolean value where zero means use all data and one means use the same
        proportion of data labelled bought and not bought, return the dataframe without anomalies and with the 
        correct proportion for the data. '''
    a = data.loc[data['Bought'] == 1]
    goodBuys = a.loc[a['Average Time'] != 0]
    notBought = data.loc[data['Bought'] == 0]
    if parted:
        data = [goodBuys, notBought[:goodBuys.shape[0] + 1]]
    else: 
        data = [goodBuys, notBought]
    data = pd.concat(data)
    return data

def convertZero(value):
    ''' Given a value, convert the value if it is None to a string zero. Otherwise return the original value. '''
    if value is None:
        return '0'
    else:
        return value

def convertTime(value):
    ''' Convert the value from ms to seconds. '''
    return value/1000
    
def stringToInt(data):
    '''Given the pandas dataframe, convert all the strings into integers by categorizing the strings. Then return the 
        pandas dataframe with the converted strings and a list dat contains lists with which integer corresponds to 
        which string. '''
    attributes = ['Country','Browser Teacher', 'OS Teacher', 'Browser Student', 
                  'OS Student', 'Mobile Teacher', 'Mobile Student']
    # Holds the corresponding numbers of categories
    data[['Average Time']] = data[['Average Time']].apply(convertTime)
    indicesAttributes = []
    for attribute in attributes:
        data[attribute] = data[attribute].apply(convertZero) 
        if (attribute != 'Mobile Teacher') & (attribute != 'Mobile Student'):
            le = preprocessing.LabelEncoder()
            data[attribute] = le.fit_transform(data[attribute])
            indicesAttributes.append(list(le.classes_))
    return (data,indicesAttributes)

def splitData(data):
    ''' Given a pandas dataframe, split the data into the labels, y and the features x. Then return x and y. '''
    dataY = data['Bought']
    dataX = data.ix[:,:-1]
    dataX = dataX[['Amount of Students', 'Average Amount Students', 'Average Grade', 
         'Average Time', 'Country', 'Browser Teacher', 'OS Teacher',
        'Browser Student', 'OS Student']]
    y = np.asarray(dataY.values,dtype="int")
    x = dataX.values
    return(x,y)

def prepData(filename, parted):
    ''' Given a pkl filename and a boolean value where zero means use all data and one means use the same
        proportion of data labelled bought and not bought, return the features and labels without anomalies seperately
        and the list that contains lists that contain which integer corresponds to which string. '''
    data = getData(filename)
    data = removeAnomalies(data, parted)
    (data, indicesAttributes) = stringToInt(data)
    (x,y) = splitData(data)
    return (x, y, indicesAttributes)

def applyML(x,y, algorithm):
    ''' Given the features, labels and algorithm object, apply Machine Learning with that algorithm and return the 
        mean accuracy of the k-fold testing. '''
    n_items = len(x)
    kf = KFold(n_items, 8, shuffle = True, random_state = 4)
    testVals = []
    predYs = []
    for train, test in kf:
        testX = []
        trainX = []
        trainY = []
        for tr in train:
            trainX.append(x[tr])
            trainY.append(y[tr])
        for t in test:
            testX.append(x[t])
            testVals.append(y[t])
        algorithm.fit(trainX, trainY)
        predY = algorithm.predict(testX)
        predYs.extend(predY)
    good = 0
    for j in range(len(predYs)):
        if predYs[j] == testVals[j]:
            good += 1
    accuracy = good/len(predYs)
    return (accuracy, testVals, predYs)

def getAccuracy(MLObject, name, x, y):
    ''' Given the algorithm object, name of the algorithm, the features and the labels, apply Machine Learning with the
        algorithm, print a message about the accuracy and return the accuracy. '''
    accuracy, testVals, predYs = applyML(x,y, MLObject)
    accuracy = round(accuracy*100)
    print(name, accuracy, '%.')
    return (accuracy, testVals, predYs)

def convertVals(testVals):
    newVals = []
    for val in testVals:
        if val == 1:
            newVals.append('Bought')
        elif val == 0:
            newVals.append('Not Bought')
    return newVals

def returnAllAccuracies(names, algorithms, x, y):
    '''Given the list of algorithm names, the list of algorithm objects, the features and labels'''
    accuracies = []
    for i in range(len(algorithms)): 
        accuracy, testVals, predYs = getAccuracy(algorithms[i], names[i], x, y)
        accuracies.append(accuracy)
        testVals = convertVals(testVals)
        predYs = convertVals(predYs)
        confusion_matrix = ConfusionMatrix(testVals, predYs)
        print("\nConfusion matrix:\n%s" % confusion_matrix)
        print("\n")
    return accuracies

def applyMLUserInput(xtrain, ytrain, xtest, algorithm):
    algorithm.fit(xtrain, ytrain)
    # 1 means buying
    predY = algorithm.predict(xtest)
    cols = ['Amount of Students', 'Average Amount Students', 'Average Grade', 'Average Time', 'Country', 
             'Browser Teacher', 'OS Teacher', 'Browser Student', 'OS Student', 'Prediction']
    xtotal = []
    for xNum in range(len(xtest)):
        listX = list(xtest[xNum])
        listX.append(predY[xNum])
        xtotal.append(listX)
    table = [[1 , 2], [3, 4]]
    frame = pd.DataFrame(xtotal)
    frame.columns = cols
    return frame




# User input

In [3]:
# Change files here as necessary
trainFile = 'trialframe.pkl'
testFile = 'trialframe.pkl'

# Apply Machine Learning and show the dataframe with predictions
algorithm = ExtraTreesClassifier(n_estimators=200, max_depth=None, min_samples_split=1, random_state=0)
(xtrain, ytrain, indicesAttributesTrain) = prepData(testFile, 1)
(xtest, ytest, indicesAttributesTest) = prepData(testFile, 0)
newDataFrame = applyMLUserInput(xtrain, ytrain, xtest, algorithm)
newDataFrame

Unnamed: 0,Amount of Students,Average Amount Students,Average Grade,Average Time,Country,Browser Teacher,OS Teacher,Browser Student,OS Student,Prediction
0,12,4.000000,0.000000,8.513000,9,0,0,0,0,1
1,2,2.000000,1.000000,3.274357,9,0,0,0,0,1
2,18,18.000000,1.721311,27.513284,48,0,0,0,0,1
3,37,12.333333,2.882353,37.413076,33,114,7,43,7,1
4,1,1.000000,2.818182,38.278318,33,0,0,0,0,1
5,2,2.000000,2.636364,46.055318,33,0,0,0,0,1
6,15,15.000000,2.057143,55.227600,40,0,0,82,4,1
7,21,21.000000,2.011682,35.908465,33,0,0,0,0,1
8,2,2.000000,2.250000,33.013125,38,0,0,0,0,1
9,1,1.000000,3.000000,35.178000,16,0,0,0,0,1


# Comparison of algorithms with confusion matrices

In [4]:
n_neighbors = 300
# All tested algorithm objects
algorithms = [GaussianNB(), svm.SVC(), svm.LinearSVC(), linear_model.LogisticRegression(), 
              linear_model.SGDClassifier(), DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0),
              neighbors.KNeighborsClassifier(n_neighbors, weights='distance'), RandomForestClassifier(n_estimators=10),
              AdaBoostClassifier(n_estimators=100), ExtraTreesClassifier(n_estimators=200,
                                                                         max_depth=None, min_samples_split=1, 
                                                                         random_state=0)]
namesAlgorithms = ['Gaussian Naive Bayes: ', 'Support Vector Machine: ', 'Linear Vector Machine: ', 
                   'Logistic Regression: ', 'SGD Classifier: ', 'Decision Tree Classifier: ', 'K-Nearest Neighbors: ', 
                   'Random Forest: ', 'AdaBoost Classifier: ', 'Extra Trees Classifier: ']

# 50/50 difference
print("NL-Algorithms when 50/50 data: \n")
(x,y, indicesAttributes) = prepData('trialframe.pkl', 1)
accuracies50 = returnAllAccuracies(namesAlgorithms, algorithms, x, y)

# Complete data
print("\nNL-Algorithms when complete data: \n")
(x,y, indicesAttributes) = prepData('trialframe.pkl', 0)
accuraciesNormal = returnAllAccuracies(namesAlgorithms, algorithms, x, y)

NL-Algorithms when 50/50 data: 

Gaussian Naive Bayes:  70 %.

Confusion matrix:
Predicted   Bought  Not Bought  __all__
Actual                                 
Bought         154          78      232
Not Bought      63         170      233
__all__        217         248      465


Support Vector Machine:  78 %.

Confusion matrix:
Predicted   Bought  Not Bought  __all__
Actual                                 
Bought         232           0      232
Not Bought     100         133      233
__all__        332         133      465


Linear Vector Machine:  75 %.

Confusion matrix:
Predicted   Bought  Not Bought  __all__
Actual                                 
Bought         197          35      232
Not Bought      79         154      233
__all__        276         189      465


Logistic Regression:  81 %.

Confusion matrix:
Predicted   Bought  Not Bought  __all__
Actual                                 
Bought         210          22      232
Not Bought      65         168      233
__all__

# Feature Selection

In [5]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel

features = ['Teacher/Parent', 'Amount of Students', 'Amount Classrooms', 'Average Amount Students', 'Average Grade', 
         'Average Time', 'Average Tries', 'Referral', 'Country', 'Mobile Teacher', 'Browser Teacher', 'OS Teacher', 
         'Mobile Student', 'Browser Student', 'OS Student', 'Amount Logins Teacher', 'Amount Logins Students']

# To see which features are the most important ones
(x,y, indicesAttributes) = prepData('trialframe.pkl', 1)
clf = ExtraTreesClassifier()
clf = clf.fit(x, y)
feat = clf.feature_importances_ 
labeledResults = []
for i in range(len(feat)): 
    labeledResults.append((i, feat[i]))

# Sort on importance 
a = sorted(labeledResults, key = lambda x : x[1], reverse=True)
count = 1
for i in a:
    print(str(count) + ': ' + features[i[0]])
    print(str(i[1]) + '\n')
    count += 1

1: Amount Classrooms
0.408404178622

2: Average Amount Students
0.101625688699

3: Referral
0.0776313599271

4: Average Time
0.0756184517027

5: Teacher/Parent
0.0733884582583

6: Amount of Students
0.0679271710688

7: Average Grade
0.0676780057879

8: Country
0.0669589950326

9: Average Tries
0.0607676909018



# Venn diagrams of data

In [26]:
# Divide buyers and non-buyers
# Also needed for plots 
frame = getData('trialframe.pkl')
frame = removeAnomalies(frame, 1)
frame = convertZero(frame)
frame, att = stringToInt(frame)
newFrame = frame[['Amount of Students', 'Average Amount Students', 'Average Grade', 
                  'Average Time', 'Country', 'Browser Teacher', 'OS Teacher',
                  'Browser Student', 'OS Student', 'Bought']]

buyers = newFrame[newFrame.Bought == 1]
nonBuyers = newFrame[newFrame.Bought == 0]
(xnonBuyers, ynonBuyers) = splitData(nonBuyers)
(xtotal, ytotal) = splitData(newFrame)

In [7]:
from matplotlib import pyplot as plt
import numpy as np
from matplotlib_venn import venn2, venn2_circles

categories = ['Country', 'Browser Teacher', 'OS Teacher', 'Browser Student', 'OS Student']

# Make Venn diagrams and print percentages and categories of these diagrams
for i in range(len(categories)):
    plt.figure(i+1)
    plt.title("Venn Diagram" + categories[i])
    set1 = set(buyers[categories[i]])
    set2 = set(nonBuyers[categories[i]])
    venn2([set1, set2],  set_labels = ('Buyers', 'Non-Buyers'))
    print(categories[i] + ": ")
    nonBuyersCat = set2.difference(set1.intersection(set2))
    nonBuyersSet = list(nonBuyers[categories[i]])
    for j in nonBuyersCat:
        print(att[i][j] + ' ' + str(round(nonBuyersSet.count(j)/len(nonBuyersSet), 2)) + '%')
    print("\n")

plt.show()

Country: 
argentina 0.0%
belgium 0.01%
italy 0.0%
morocco 0.01%


Browser Teacher: 
Firefox 34.0 0.0%
Firefox 39.0 0.0%
Chrome 41.0.2272.118 0.0%
Firefox 40.0 0.02%
Chrome 44.0.2403.157 0.0%
Safari 4.0 0.0%
Chrome 45.0.2454.98 0.0%
Safari 7.0 0.0%
Chrome 46.0.2490.64 0.0%
Safari 7.1.8 0.0%
Safari 8.0.6 0.0%
Safari 8.0.8 0.0%
Chromium 45.0.2454.101 0.0%


OS Teacher: 
Windows XP 0.01%
Linux 0.0%
Windows 8 0.01%


Browser Student: 
Firefox 41.0 0.0%
Firefox 42.0 0.01%
Chrome 45.0.2454.93 0.02%
Chrome 45.0.2454.98 0.01%
Chrome 45.0.2454.99 0.0%
Chrome 46.0.2490.71 0.0%


OS Student: 
Windows Vista 0.0%
Windows XP 0.0%




# Clustering findings

In [25]:
from sklearn.cluster import MeanShift

# Clustering within the non buyers with meanshift
meanshift = MeanShift()
meanshift.fit(xnonBuyers)
labels = meanshift.labels_
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 0, 1, 1, 5,
       1, 1, 0, 0, 0, 0, 6, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 2, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 3, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 2, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2,
       1, 0, 1])

In [27]:
from sklearn.cluster import KMeans

# Clustering within the non buyers with kMeans
kmean = KMeans(n_clusters=3)
kmean.fit_predict(xnonBuyers)
labels = kmean.labels_
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, 1,
       2, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,
       0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 2,
       2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 2, 2, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 0, 2,
       0, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0,
       0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2,
       0, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2,
       2, 0, 2], dtype=int32)

In [29]:
# Clustering in complete data with meanshift
meanshift = MeanShift()
meanshift.fit(xtotal)
labels = meanshift.labels_
labels

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  1,
        0,  0,  0,  0,  0,  0,  7,  1,  0,  0,  0,  0,  0,  0,  0,  0,  4,
        0,  0,  0,  3,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0, 13,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  9,  0,  0,  0, 11,
       12,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0, 10,  2,  0,  0,  0,  0,  0,  0, 10,
        0,  0,  1,  0,  0,  0,  0, 10,  0,  0,  3,  0,  0,  0,  0, 10,  0,
        1,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  1,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0

In [28]:
# Clustering in complete data with kMeans
kmean = KMeans(n_clusters=2)
kmean.fit_predict(xtotal)
labels = kmean.labels_
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0,

# Plots of data

In [8]:
# Make a graph of the accuracies
import plotly.plotly as py
# Sign in with your information
username = ''
code = ''
py.sign_in(username, code)
import plotly.graph_objs as go

# Data for 50/50 data
trace1 = go.Bar(
    x=namesAlgorithms,
    y=accuracies50,
    name='50/50 Data'
) # Data for complete data
trace2 = go.Bar(
    x=namesAlgorithms,
    y=accuraciesNormal,
    name='All Data'
)

data = [trace1, trace2]
layout = go.Layout(
    barmode='group'
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-barnew')

## Average values of the features: 
* Amount of Students
* Average Amount Students 
* Average Grade
* Average Tries
* Average Time

In [9]:
features = ['Amount of Students', 'Average Amount Students', 'Average Grade', 'Average Time'] 

bmeans = []
nmeans = []
for feature in features:
    bmeans.append(buyers[feature].mean())
    nmeans.append(nonBuyers[feature].mean())
    
trace1 = go.Bar(x=features, y=bmeans, name='Buyers')
trace2 = go.Bar(x=features, y=nmeans, name='Non-Buyers')
data = [trace1, trace2]
layout = go.Layout(barmode='group')
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')

## Most common values of the features:
* Country
* Browser Teacher
* OS Teacher
* Browser Student
* OS Student

In [10]:
def convertNames(numbers, n):
    ''' Given a list of categories in number form and the index of the category in the attributeslist,
        return the list of categories in word form. '''
    names = []
    for num in numbers:
        names.append(att[n][num])
    return names

def printGraph(n,buy):
    ''' Make a graph of the corresponding input values that indicate a category and if it is bought or not. '''
    if buy == 1:
        values = mostcommonsb[n].reset_index().values
    else:
        values = mostcommonsn[n].reset_index().values
    freq = values.T[1]
    names = convertNames(values.T[0],n)
    trace = go.Bar(x=names, y=freq)
    data = [trace]
    layout = go.Layout(barmode='group')
    fig = go.Figure(data=data, layout=layout)
    return fig

In [12]:
# Get lists of most common values
features2 = ['Country', 'Browser Teacher', 'OS Teacher', 'Browser Student', 'OS Student']

n = 6
mostcommonsb = []
mostcommonsn = []
for feature in features2:
    mostcommonsb.append(buyers[feature].value_counts()[:n])
    mostcommonsn.append(nonBuyers[feature].value_counts()[:n])

In [13]:
# Most common countries: non-buyers
fig = printGraph(0,0)
py.iplot(fig, filename='grouped-bar')

In [14]:
# Most common countries: buyers
fig = printGraph(0,1)
py.iplot(fig, filename='grouped-bar')

In [15]:
# Most Common Browser Teacher: non-buyers
fig = printGraph(1,0)
py.iplot(fig, filename='grouped-bar')

In [16]:
# Most common Browser Teacher: non-buyers
fig = printGraph(1,1)
py.iplot(fig, filename='grouped-bar')

In [17]:
# Most Common OS Teacher: non-buyers
fig = printGraph(2,0)
py.iplot(fig, filename='grouped-bar')

In [18]:
# Most Common OS Teacher: buyers
fig = printGraph(2,1)
py.iplot(fig, filename='grouped-bar')

In [19]:
# Most common Browser Student: non-buyers
fig = printGraph(3,0)
py.iplot(fig, filename='grouped-bar')

In [20]:
# Most common Browser Students: buyers
fig = printGraph(3,1)
py.iplot(fig, filename='grouped-bar')

In [21]:
# Most common OS Students: non-buyers
fig = printGraph(4,0)
py.iplot(fig, filename='grouped-bar')

In [22]:
# Most common OS Student: buyers
fig = printGraph(4,1)
py.iplot(fig, filename='grouped-bar')