In [24]:
#!/usr/bin/python

"""
========
Barchart
========

A bar plot with errorbars and height labels on individual bars
"""""

import numpy as np
import matplotlib.pyplot as plt

from matplotlib.ticker import FuncFormatter
formatter = FuncFormatter(lambda y, pos: "%d%%" % (y))
#ax.yaxis.set_major_formatter(formatter)
plt.figure(figsize=(1.5,1)) # sets size of figure

<matplotlib.figure.Figure at 0x7f367054eef0>

In [25]:
# dictionary to store each study's accuracies and standard deviation values

algorithms = {
    'knn': {},
    'dec_tree': {},
    'naive_bayes': {},
    'svm': {},
    'gaussian': {},
    'ran_forest': {},
    'neural_net': {},
    'adaboost': {},
    'extra_trees': {},
}

# full names for each algorithm
algorithms['knn']['name'] = "KNN"
algorithms['dec_tree']['name'] = "Decision Tree"
algorithms['naive_bayes']['name'] = "Naive Bayes"
algorithms['svm']['name'] = "SVM"
algorithms['gaussian']['name'] = "Gaussian Process"
algorithms['ran_forest']['name'] = "Random Forest"
algorithms['neural_net']['name'] = "Neural Net"
algorithms['adaboost']['name'] = "AdaBoost"
algorithms['extra_trees']['name'] = "Extra Trees Classifier"

# stores studies that were not ran with 9 classifiers
other = {
    'wu': {},
    'yat': {},
}
other['wu']['title'] = 'Wu'
other['yat']['title'] = 'Yatsunenko'

In [26]:
# Classifciation ccuracies for each study (in decimal format)
# full names for each algorithm

# in order of amish, HMP, turnbaugh, new_study
algorithms['knn']['accuracies'] = [0.470502021563, 0.517695200177, 0.689094301221, 0.854166666667]
algorithms['dec_tree']['accuracies'] = [0.580275718778, 0.679468038045, 0.657598371777, 0.85119047619]
algorithms['naive_bayes']['accuracies'] = [0.457659478886, 0.547445255474, 0.549327227499,0.72619047619] 
algorithms['svm']['accuracies'] = [0.530750224618, 0.592402123424, 0.703132066938, 0.880952380952]
algorithms['gaussian']['accuracies'] = [0.5, 0.5, 0.5, 0.5]
algorithms['ran_forest']['accuracies'] = [0.700398697215, 0.639239106392, 0.778459972863,  0.964285714286] 
algorithms['neural_net']['accuracies'] = [0.532827942498, 0.628400796284, 0.750045228403,  0.827380952381] 
algorithms['adaboost']['accuracies'] = [0.637415768194, 0.661634594116, 0.770612844867, 0.91369047619]
algorithms['extra_trees']['accuracies'] = [0.67268081761, 0.606890068569, 0.774061510629, 0.970238095238]


# Wu study accuracies
other['wu']['accuracies'] = [0.592995169082, 0.652657004831, 0.49154589372, 0.728985507246, 0.5, 
                              0.966183574879, 0.711111111111, 0.619082125604]
# Yatsunenko study accuracies
other['yat']['accuracies'] = [0.584343825249, 0.638425925926, 0.485989278752, 0.530306157551, 0.5]

In [27]:
# Standard deviation values for each study (in decimal format)

# in order of amish, HMP, turnbaugh, new_study
algorithms['knn']['std_dev'] = [0.08512, 0.1373923, 0.202611069314, 0.137219]
algorithms['dec_tree']['std_dev'] = [0.136595, 0.198704, 0.115264000084, 0.1383601] 
algorithms['naive_bayes']['std_dev'] = [0.087962, 0.1, 0.0877397345522, 0.14617]
algorithms['svm']['std_dev'] = [0.10546, 0.1711936, 0.202617456781, 0.20495]
algorithms['gaussian']['std_dev'] = [0.0, 0.0, 0.0, 0.0]
algorithms['ran_forest']['std_dev'] = [0.11296, 0.2135, 0.157518127573, 0.0509175]
algorithms['neural_net']['std_dev'] = [0.142749, 0.15766, 0.155300886903, 0.1327533] 
algorithms['adaboost']['std_dev'] = [0.1725878, 0.166027, 0.182318062827,  0.09429]
algorithms['extra_trees']['std_dev'] = [0.12152, 0.1696152, 0.169972989806,  0.0786165]

# Wu study standard deviation values
other['wu']['std_dev'] = [0.28734894159, 0.193476958146, 0.025, 0.18405916923, 0.0, 
                           0.0666666666667, 0.258112809101, 0.314735338552]

# Yatsunenko study standard deviation values
other['yat']['std_dev'] = [0.121632955255, 0.138845548793, 0.0223951604119, 0.146730148321]

In [28]:
'''
# Randomly Generated Accuracies (Most frequent)

# Amish study random accuracies
values['amish']['ran_acc'] = 0.665217

# HMP study random accuracies
values['HMP']['ran_acc'] = 0.81879

# Turnbaugh study random accuracies
values['turnbaugh']['ran_acc'] = 0.76205

# New study random accuracies
values['new_study']['ran_acc'] = 0.5

# Wu study random accuracies
other['wu']['std_dev'] = 0

# Yatsunenko study random accuracies
other['yat']['std_dev'] = 0
'''

"\n# Randomly Generated Accuracies (Most frequent)\n\n# Amish study random accuracies\nvalues['amish']['ran_acc'] = 0.665217\n\n# HMP study random accuracies\nvalues['HMP']['ran_acc'] = 0.81879\n\n# Turnbaugh study random accuracies\nvalues['turnbaugh']['ran_acc'] = 0.76205\n\n# New study random accuracies\nvalues['new_study']['ran_acc'] = 0.5\n\n# Wu study random accuracies\nother['wu']['std_dev'] = 0\n\n# Yatsunenko study random accuracies\nother['yat']['std_dev'] = 0\n"

In [29]:
# Iterates through all the studies for both accuracies and std_dev lists
# in order to obtain percentage values (multiply by 100)

# iterate through each study in dictionary
for alg in algorithms:
    # multiply accuracies in study by 100
    for index, item in enumerate(algorithms[alg]['accuracies']):
        algorithms[alg]['accuracies'][index] *= 100

    #multiply each std_dev value in study by 100
    for index, item in enumerate(algorithms[alg]['std_dev']):
        algorithms[alg]['std_dev'][index] *= 100
        
    
    #values[study_name]['ran_acc'] *= 100

In [30]:
# Plots a figure given a study_name (string) and its corresponding list of accuracies and list of std deviations
# assumes that 9 classifiers are used

def plotFigure():
    N = 9 # number of classifiers
    
    ind = np.arange(N)  # the x locations for the groups
    width = 0.35      # the width of the bars

    # plot bars
    fig, ax = plt.subplots()
    
    rects1 = ax.bar(ind, algorithms['knn']['accuracies'], width, color='#6db7c4', yerr=algorithms['knn']['std_dev'])
    rects2 = ax.bar(ind, algorithms['dec_tree']['accuracies'], width, color='#6db7c4', yerr=algorithms['dec_tree']['std_dev'])
    rects3 = ax.bar(ind, algorithms['naive_bayes']['accuracies'], width, color='#6db7c4', yerr=algorithms['naive_bayes']['std_dev'])
    rects4 = ax.bar(ind, algorithms['svm']['accuracies'], width, color='#6db7c4', yerr=algorithms['svm']['std_dev'])
    rects5 = ax.bar(ind, algorithms['gaussian']['accuracies'], width, color='#6db7c4', yerr=algorithms['gaussian']['std_dev'])
    rects6 = ax.bar(ind, algorithms['ran_forest']['accuracies'], width, color='#6db7c4', yerr=algorithms['ran_forest']['std_dev'])
    rects7 = ax.bar(ind, algorithms['neural_net']['accuracies'], width, color='#6db7c4', yerr=algorithms['neural_net']['std_dev'])
    rects8 = ax.bar(ind, algorithms['adaboost']['accuracies'], width, color='#6db7c4', yerr=algorithms['adaboost']['std_dev'])
    rects9 = ax.bar(ind, algorithms['extra_trees']['accuracies'], width, color='#6db7c4', yerr=algorithms['extra_trees']['std_dev'])
    
    
    ax.legend((rects1[0], rects2[0], rects3[0], rects4[0], rects5[0], rects6[0], rects7[0], rects8[0], rects9[0]), 
              ('Amish', 'HMP', 'Turnbaugh', 'New Study'))
    
    #ax.axhline(y=ran_acc, c="red",linewidth=1,zorder=0)

    # range for y values
    plt.ylim([0,100])
    
    # labels along x axis (classifiers)
    labels = ("KNN", "Decision Tree", "Naive Bayes", "SVM", "Gaussian Process", "Random Forest", 
               "Neural Net", "AdaBoost", "Extra Trees Classifier")

    # add some text for labels, title and axes ticks
    ax.set_ylabel('Classification Accuracies (%)')
    ax.set_title('Classification Accuracies for Each Study')

    # ticks for name of classifier
    plt.xticks(range(4), labels, rotation=35, ha='center')
    ax.set_xticklabels(labels, ha='right')

    # displays the figure
    plt.show()

In [31]:
plotFigure()

ValueError: incompatible sizes: argument 'height' must be length 9 or scalar