From 674f918d8c7a7be524ae50f89167468ed0df77df Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Thu, 3 Sep 2015 18:15:01 -0700 Subject: [PATCH 1/4] Add more documentation to Table.hist Also makes `Table.hist` throw a more helpful error message when the Table contains invalid values (`np.flexible`, which includes strings). Before, the method raised an error about not being able to operate on "flexible types", which is not extremely helpful without knowledge of numpy types. --- datascience/tables.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/datascience/tables.py b/datascience/tables.py index 4516a5827..1fba8c134 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -667,7 +667,35 @@ def pivot_hist(self, pivot_column_label, value_column_label, overlay=False, **va return t def hist(self, overlay=False, **vargs): - """Draw histograms of all columns.""" + """Requires all columns in the table to contain numerical values only. + If the columns contain other types, a ValueError is raised. + + Draw one histogram per column. If the overlay argument is True, a legend + containing the column name is shown on each histogram. + + See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist + for additional arguments that can be passed into vargs. These include: + bins, range, normed, cumulative, and orientation, to name a few. + + >>> table + count | points + 9 | 1 + 3 | 2 + 3 | 2 + 1 | 10 + + >>> table.hist() + + + """ + # Check for non-numerical values and raise a ValueError if any found + # TODO(sam): Is a ValueError the right thing to raise? + for col in self: + if any(isinstance(cell, np.flexible) for cell in self[col]): + raise ValueError("The column '{0}' contains non-numerical " + "values. A histogram cannot be drawn for this table." + .format(col)) + n = len(self) colors = list(itertools.islice(itertools.cycle(('b', 'g', 'r')), n)) if overlay: From 9e9a823d31861c95782b52df554031da5e0ba29f Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sat, 5 Sep 2015 01:09:25 -0700 Subject: [PATCH 2/4] Add documentation to the barh method As well as raising a ValueError with a helpful error message if we try to graph non-numerical values. --- datascience/tables.py | 76 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 1fba8c134..7593a4d9e 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -582,10 +582,82 @@ def annotate(axis, ticks): self._visualize(labels, xticks, overlay, draw, annotate) def barh(self, column_for_categories, overlay=False, **vargs): - """Plot contents as a horizontal bar chart.""" + """Plots horizontal bar charts for the table. + + Each chart is categorized using the values in `column_for_categories` + and one chart is produced for every other column in the table. + A total of n - 1 charts are created where n is the number of columns + in the table. + + Note that the order of the categories in the table is not guaranteed to + be preserved in the bar graph. Ex. `barh` on a table with "a", "b", "c" + as the rows in the `column_for_categories` may not output a bar graph + with the labels in that order. + + Requires every column except for `column_for_categories` to be + numberical. If the columns contain other types, a `ValueError` is + raised. + + Args: + column_for_categories (str): The name to use for the bar chart + categories + + Kwargs: + overlay (bool): If True, creates one chart with n - 1 bars for each + category, one for each column other than `column_for_categories` + (instead of the default behavior of creating n - 1 charts). + Also adds a legend that matches each bar color to its column. + + vargs: Additional arguments that get passed into :func:plt.barh. + See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh + for additional arguments that can be passed into vargs. These + include: `linewidth`, `xerr`, `yerr`, and `log`, to name a few. + + Returns: + None + + Raises: + ValueError: The Table contained non-numerical values in columns + other than `column_for_categories` + + >>> furniture_table + Type of furniture | Count + chairs | 6 + tables | 1 + desks | 2 + + >>> furniture_table.barh('Type of furniture') + + + >>> furniture_table.barh('Count') + ValueError: The column 'Type of furniture' contains non-numerical + values. A bar graph cannot be drawn for this table. + + >>> foo_table + Type of furniture | Count | Other col + chairs | 6 | 10 + tables | 1 | 20 + desks | 2 | 30 + + >>> foo_table.barh('Type of furniture') + + + + >>> foo_table.barh('Type of furniture', overlay=True) + + """ options = self.default_options.copy() options.update(vargs) + yticks, labels = self._split(column_for_categories) + for label in labels: + if any(isinstance(cell, np.flexible) for cell in self[label]): + raise ValueError("The column '{0}' contains non-numerical " + "values. A bar graph cannot be drawn for this table." + .format(label)) + index = np.arange(self.num_rows) margin = 0.1 width = 1 - 2 * margin @@ -675,7 +747,7 @@ def hist(self, overlay=False, **vargs): See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist for additional arguments that can be passed into vargs. These include: - bins, range, normed, cumulative, and orientation, to name a few. + bins, range, normed, cumulative, and orientation, to name a few. >>> table count | points From 9ed723cb1a91fa8b68e7fb76ffd7a6f55f1c9036 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sat, 5 Sep 2015 01:14:10 -0700 Subject: [PATCH 3/4] Make Table.hist documentation a little better --- datascience/tables.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 7593a4d9e..07f2c5629 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -617,7 +617,7 @@ def barh(self, column_for_categories, overlay=False, **vargs): None Raises: - ValueError: The Table contained non-numerical values in columns + ValueError: The Table contains non-numerical values in columns other than `column_for_categories` >>> furniture_table @@ -739,15 +739,26 @@ def pivot_hist(self, pivot_column_label, value_column_label, overlay=False, **va return t def hist(self, overlay=False, **vargs): - """Requires all columns in the table to contain numerical values only. + """Plots one histogram for each column in the table. + + Requires all columns in the table to contain numerical values only. If the columns contain other types, a ValueError is raised. - Draw one histogram per column. If the overlay argument is True, a legend - containing the column name is shown on each histogram. + Kwargs: + overlay (bool): If True, adds a legend to each of the plots showing + the column name being plotted. + + vargs: Additional arguments that get passed into :func:plt.hist. + See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist + for additional arguments that can be passed into vargs. These + include: `bins`, `range`, `normed`, `cumulative`, and + `orientation`, to name a few. + + Returns: + None - See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist - for additional arguments that can be passed into vargs. These include: - bins, range, normed, cumulative, and orientation, to name a few. + Raises: + ValueError: The Table contains non-numerical values >>> table count | points From 1698fc0b2f0e71b3fc490b96c03eba3fd259ea51 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sat, 5 Sep 2015 01:28:19 -0700 Subject: [PATCH 4/4] Fix erroneous description of overlay arg for hist --- datascience/tables.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 07f2c5629..2f934a5db 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -745,8 +745,10 @@ def hist(self, overlay=False, **vargs): If the columns contain other types, a ValueError is raised. Kwargs: - overlay (bool): If True, adds a legend to each of the plots showing - the column name being plotted. + overlay (bool): If True, plots 1 chart with all the histograms + overlaid on top of each other (instead of the default behavior of + one histogram for each column in the table). Also adds a legend + that matches each bar color to its column. vargs: Additional arguments that get passed into :func:plt.hist. See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist