diff --git a/datascience/tables.py b/datascience/tables.py index 4516a5827..2f934a5db 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -582,10 +582,82 @@ def annotate(axis, ticks): self._visualize(labels, xticks, overlay, draw, annotate) def barh(self, column_for_categories, overlay=False, **vargs): - """Plot contents as a horizontal bar chart.""" + """Plots horizontal bar charts for the table. + + Each chart is categorized using the values in `column_for_categories` + and one chart is produced for every other column in the table. + A total of n - 1 charts are created where n is the number of columns + in the table. + + Note that the order of the categories in the table is not guaranteed to + be preserved in the bar graph. Ex. `barh` on a table with "a", "b", "c" + as the rows in the `column_for_categories` may not output a bar graph + with the labels in that order. + + Requires every column except for `column_for_categories` to be + numberical. If the columns contain other types, a `ValueError` is + raised. + + Args: + column_for_categories (str): The name to use for the bar chart + categories + + Kwargs: + overlay (bool): If True, creates one chart with n - 1 bars for each + category, one for each column other than `column_for_categories` + (instead of the default behavior of creating n - 1 charts). + Also adds a legend that matches each bar color to its column. + + vargs: Additional arguments that get passed into :func:plt.barh. + See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh + for additional arguments that can be passed into vargs. These + include: `linewidth`, `xerr`, `yerr`, and `log`, to name a few. + + Returns: + None + + Raises: + ValueError: The Table contains non-numerical values in columns + other than `column_for_categories` + + >>> furniture_table + Type of furniture | Count + chairs | 6 + tables | 1 + desks | 2 + + >>> furniture_table.barh('Type of furniture') + + + >>> furniture_table.barh('Count') + ValueError: The column 'Type of furniture' contains non-numerical + values. A bar graph cannot be drawn for this table. + + >>> foo_table + Type of furniture | Count | Other col + chairs | 6 | 10 + tables | 1 | 20 + desks | 2 | 30 + + >>> foo_table.barh('Type of furniture') + + + + >>> foo_table.barh('Type of furniture', overlay=True) + + """ options = self.default_options.copy() options.update(vargs) + yticks, labels = self._split(column_for_categories) + for label in labels: + if any(isinstance(cell, np.flexible) for cell in self[label]): + raise ValueError("The column '{0}' contains non-numerical " + "values. A bar graph cannot be drawn for this table." + .format(label)) + index = np.arange(self.num_rows) margin = 0.1 width = 1 - 2 * margin @@ -667,7 +739,48 @@ def pivot_hist(self, pivot_column_label, value_column_label, overlay=False, **va return t def hist(self, overlay=False, **vargs): - """Draw histograms of all columns.""" + """Plots one histogram for each column in the table. + + Requires all columns in the table to contain numerical values only. + If the columns contain other types, a ValueError is raised. + + Kwargs: + overlay (bool): If True, plots 1 chart with all the histograms + overlaid on top of each other (instead of the default behavior of + one histogram for each column in the table). Also adds a legend + that matches each bar color to its column. + + vargs: Additional arguments that get passed into :func:plt.hist. + See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist + for additional arguments that can be passed into vargs. These + include: `bins`, `range`, `normed`, `cumulative`, and + `orientation`, to name a few. + + Returns: + None + + Raises: + ValueError: The Table contains non-numerical values + + >>> table + count | points + 9 | 1 + 3 | 2 + 3 | 2 + 1 | 10 + + >>> table.hist() + + + """ + # Check for non-numerical values and raise a ValueError if any found + # TODO(sam): Is a ValueError the right thing to raise? + for col in self: + if any(isinstance(cell, np.flexible) for cell in self[col]): + raise ValueError("The column '{0}' contains non-numerical " + "values. A histogram cannot be drawn for this table." + .format(col)) + n = len(self) colors = list(itertools.islice(itertools.cycle(('b', 'g', 'r')), n)) if overlay: