From 3717b6794ea62ac2c57044a188105008b346065f Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 16:50:36 -0700 Subject: [PATCH 1/6] Make table.sample default to `with_replacement=True` --- datascience/tables.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 7cbc7fe37..9b0573a2f 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -1170,7 +1170,7 @@ def percentile(self, p): percentiles = [[_util.percentile(p, column)] for column in self.columns] return self._with_columns(percentiles) - def sample(self, k=None, with_replacement=False, weights=None): + def sample(self, k=None, with_replacement=True, weights=None): """Returns a new table where k rows are randomly sampled from the original table. @@ -1179,8 +1179,8 @@ def sample(self, k=None, with_replacement=False, weights=None): sampled. If an integer, k rows from the original table are sampled. - with_replacement (bool): If False (default), samples the rows - without replacement. If True, samples the rows with replacement. + with_replacement (bool): If True (default), samples the rows with + replacement. If False, samples the rows without replacement. weights (list/array or None): If None (default), samples the rows using a uniform random distribution. If a list/array is passed @@ -1204,9 +1204,15 @@ def sample(self, k=None, with_replacement=False, weights=None): >>> jobs.sample() # doctest: +SKIP job | wage b | 20 - c | 15 + b | 20 a | 10 d | 8 + >>> jobs.sample(with_replacement=True) # doctest: +SKIP + job | wage + d | 8 + b | 20 + c | 15 + a | 10 >>> jobs.sample(k = 2) # doctest: +SKIP job | wage b | 20 From 315bb63ef22c38d6d8a2cb358d54fa8fb5ad5b98 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 17:39:16 -0700 Subject: [PATCH 2/6] Add copy and overlay methods to Map --- datascience/maps.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/datascience/maps.py b/datascience/maps.py index 45ab4b3f8..e3c5589f8 100644 --- a/datascience/maps.py +++ b/datascience/maps.py @@ -15,6 +15,8 @@ import functools import random +from .tables import Table + _number = (int, float, np.number) @@ -92,6 +94,13 @@ def __init__(self, features=(), ids=(), width=960, height=500, **kwargs): self._height = height self._attrs.update(kwargs) + def copy(self): + """ + Copies the current Map into a new one and returns it. + """ + return Map(features=self._features, width=self._width, + height=self._height, **self._attrs) + def __getitem__(self, id): return self._features[id] @@ -257,6 +266,51 @@ def color(self, values, ids=(), key_on='feature.id', palette='YlOrBr', **kwargs) colored._folium_map = m return colored + def overlay(self, feature, color='Blue', opacity=0.6): + """ + Overlays ``feature`` on the map. Returns a new Map. + + Args: + ``feature``: a ``Table`` of map features, a list of map features, + a Map, a Region, or a circle marker map table. The features will + be overlayed on the Map with specified ``color``. + + ``color`` (``str``): Color of feature. Defaults to 'Blue' + + ``opacity`` (``float``): Opacity of overlain feature. Defaults to + 0.6. + + Returns: + A new ``Map`` with the overlain ``feature``. + """ + result = self.copy() + if type(feature) == Table: + # if table of features e.g. Table.from_records(taz_map.features) + if 'feature' in feature: + feature = feature['feature'] + + # if marker table e.g. table with columns: latitudes,longitudes,popup,color,radius + else: + feature = Circle.map_table(feature) + + if type(feature) in [list, np.ndarray]: + for f in feature: + f._attrs['fill_color'] = color + f._attrs['fill_opacity'] = opacity + f.draw_on(result._folium_map) + + elif type(feature) == Map: + for i in range(len(feature._features)): + f = feature._features[i] + f._attrs['fill_color'] = color + f._attrs['fill_opacity'] = opacity + f.draw_on(result._folium_map) + elif type(feature) == Region: + feature._attrs['fill_color'] = color + feature._attrs['fill_opacity'] = opacity + feature.draw_on(result._folium_map) + return result + @classmethod def read_geojson(cls, path_or_json_or_string): """Read a geoJSON string, object, or file. Return a dict of features keyed by ID.""" From 9036addd0115cf792a143a03a543dea07a5dcef6 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 17:39:25 -0700 Subject: [PATCH 3/6] Remove table.points from Table --- datascience/tables.py | 11 ----------- docs/tables.rst | 1 - 2 files changed, 12 deletions(-) diff --git a/datascience/tables.py b/datascience/tables.py index 9b0573a2f..ba52bd8a2 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -19,7 +19,6 @@ import pandas import IPython -import datascience.maps as _maps import datascience.formats as _formats import datascience.util as _util from datascience.util import make_array @@ -2155,16 +2154,6 @@ def boxplot(self, **vargs): values = list(columns.values()) plt.boxplot(values, **vargs) - # Deprecated - def points(self, column__lat, column__long, labels=None, colors=None, **kwargs) : - """Draw points from latitude and longitude columns. [Deprecated]""" - warnings.warn("points is deprecated. Use Circle.map", FutureWarning) - latitudes = self._get_column(column__lat) - longitudes = self._get_column(column__long) - if labels is not None : labels = self._get_column(labels) - if colors is not None : colors = self._get_column(colors) - return _maps.Circle.map(latitudes, longitudes, labels=labels, colors=colors, **kwargs) - ########### # Support # diff --git a/docs/tables.rst b/docs/tables.rst index 706dfc3e3..53a2dcf3a 100644 --- a/docs/tables.rst +++ b/docs/tables.rst @@ -132,6 +132,5 @@ Visualizations Table.barh Table.pivot_hist Table.hist - Table.points Table.scatter Table.boxplot From e1fb4b1c05c1178c21e33ec16129346daba2d3a8 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 17:41:48 -0700 Subject: [PATCH 4/6] Move changelog to README and bump version to 0.8.0 --- CHANGELOG.md | 55 ------------------------------------------ README.md | 29 ++++++++++++++++++++-- datascience/version.py | 2 +- 3 files changed, 28 insertions(+), 58 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index c931caaf8..000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,55 +0,0 @@ -`datascience` ChangeLog -======================= - -All notable changes to this project will be documented in this file. -This project adheres to [Semantic Versioning](http://semver.org/). - -## [Unreleased] -None yet. - -## v0.7.0 -### Changed -- Added predicates for string comparison: `containing` and `contained_in`. (#231) - -## v0.6.0 -### Changed -- Added `make_array` to make arrays without lists. (#224) -- `Table.select`, `drop`, and `with_columns` now accept variable arguments in addition to lists. (#224) - -## v0.5.3 -### Changed -- Allow charting methods to select particular columns and default to - `overlay=True` (#193) -- Make the `values` arg for `pivot` optional. Counting is the default behavior. - (#191) - -### Removed -- The query manager (`Q` class) is removed since no one used it, ever. - -## v0.5.1 -### Added -- New Table interface: with_columns, labels, column, relabeled - -### Changed -- Table.__init__ takes labels as its first argument - -### Deprecated -- Two-argument Table.__init__ -- Table.empty -- Table.from_rows -- Table.from_columns_dict -- Table.__getattr__ -- Table.points -- Table.column_labels renamed to labels -- Table.values renamed to column -- Table.with_relabeling renamed to relabeled - -## v0.4.0 -### Added -- This CHANGELOG file! -- Docs are now up on [readthedocs][rtd]. -- [`util.table_apply` function][table_apply] - -[rtd]: http://datascience.readthedocs.org/en/latest/index.html -[table_apply]: https://github.com/data-8/datascience/blob/f7c11b5132299dab0c75a5862cdab9c5b619c7e5/datascience/util.py#L62-L82 - diff --git a/README.md b/README.md index fde8c9bc0..8c9d2faa6 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ A Berkeley library for introductory data science. [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dsten/datascience?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -[![Documentation Status](https://readthedocs.org/projects/datascience/badge/?version=v0.5.1)](http://datascience.readthedocs.org/en/v0.5.1/?badge=v0.5.1) +[![Documentation Status](https://readthedocs.org/projects/datascience/badge/?version=master)](http://datascience.readthedocs.org/en/master/?badge=master) *written by Professor [John DeNero](http://denero.org), Professor [David Culler](http://www.cs.berkeley.edu/~culler), [Sam Lau](https://github.com/samlau95), and [Alvin Wan](http://alvinwan.com)* -For an example of usage, see the [Berkeley Data 8 class](http://databears.berkeley.edu/content/csinfostat-c8-foundations-data-science). +For an example of usage, see the [Berkeley Data 8 class](http://data8.org/). [![Build Status](https://travis-ci.org/data-8/datascience.svg?branch=master)](https://travis-ci.org/data-8/datascience) [![Coverage Status](https://coveralls.io/repos/dsten/datascience/badge.svg?branch=master&service=github)](https://coveralls.io/github/dsten/datascience?branch=master) @@ -23,6 +23,31 @@ Use `pip`: pip install datascience ``` +## Changelog + +This project adheres to [Semantic Versioning](http://semver.org/). + +### [Unreleased] +None yet. + +### v0.8.0 +**Breaking changes** + +- Change default behavior of `table.sample` to `with_replacement=True` instead + of `False`. (3717b67) + +**Additions** + +- Added `Map.copy`. +- Added `Map.overlay` which overlays a feature(s) on a new copy of Map. + (315bb63e) + +### v0.7.1 +- Remove rogue print from `table.hist` + +### v0.7.0 +- Added predicates for string comparison: `containing` and `contained_in`. (#231) + ## Documentation API reference is at http://data8.org/datascience/ . diff --git a/datascience/version.py b/datascience/version.py index f0788a87a..32a90a3b9 100644 --- a/datascience/version.py +++ b/datascience/version.py @@ -1 +1 @@ -__version__ = '0.7.1' +__version__ = '0.8.0' From e36bb23d134186730279f87066cf163a33580e53 Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 17:48:57 -0700 Subject: [PATCH 5/6] Attempt to fix tutorial in doc --- PULL_REQUEST_TEMPLATE | 2 +- docs/tables.rst | 2 ++ docs/tutorial.rst | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/PULL_REQUEST_TEMPLATE b/PULL_REQUEST_TEMPLATE index 62c73839e..6aab9459b 100644 --- a/PULL_REQUEST_TEMPLATE +++ b/PULL_REQUEST_TEMPLATE @@ -1,5 +1,5 @@ [ ] Wrote test for feature -[ ] Added note about PR in CHANGELOG.md +[ ] Added changes in the Changelog section in README.md [ ] Bumped version number (delete if unneeded) **Changes proposed:** diff --git a/docs/tables.rst b/docs/tables.rst index 53a2dcf3a..c3adea5fd 100644 --- a/docs/tables.rst +++ b/docs/tables.rst @@ -42,7 +42,9 @@ Creation :toctree: _autosummary Table.__init__ + Table.empty Table.from_records + Table.from_columns_dict Table.read_table Table.from_df Table.from_array diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 512b6430c..0c84a9ae9 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -320,7 +320,7 @@ From the text: .. ipython:: python - baby = Table.read_table('http://data8.org/textbook/notebooks/baby.csv') + baby = Table.read_table('https://github.com/data-8/textbook/raw/9aa0a167bc514749338cd7754f2b339fd095ee9b/notebooks/baby.csv') baby # Let's take a peek at the table # Select out columns we want. From 29589a15795edc1fd50e6da6353aafed2faf3bfa Mon Sep 17 00:00:00 2001 From: Sam Lau Date: Sun, 25 Sep 2016 17:56:34 -0700 Subject: [PATCH 6/6] Fix tests --- tests/test_tables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_tables.py b/tests/test_tables.py index 6805047e6..7d4506e74 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -999,7 +999,7 @@ def test_sample_weights_worepl(table): """ iterations, i = 100, 0 while i < iterations: - u = table.sample(table.num_rows) + u = table.sample(table.num_rows, with_replacement=False) assert len(set(u.rows)) == len(u.rows) i += 1 @@ -1010,7 +1010,7 @@ def test_sample_weights_with_none_k(table): """ iterations, i = 100, 0 while i < iterations: - u = table.sample() + u = table.sample(with_replacement=False) assert len(set(u.rows)) == len(u.rows) i += 1