Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 0 additions & 55 deletions CHANGELOG.md

This file was deleted.

2 changes: 1 addition & 1 deletion PULL_REQUEST_TEMPLATE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[ ] Wrote test for feature
[ ] Added note about PR in CHANGELOG.md
[ ] Added changes in the Changelog section in README.md
[ ] Bumped version number (delete if unneeded)

**Changes proposed:**
Expand Down
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
A Berkeley library for introductory data science.

[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dsten/datascience?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
[![Documentation Status](https://readthedocs.org/projects/datascience/badge/?version=v0.5.1)](http://datascience.readthedocs.org/en/v0.5.1/?badge=v0.5.1)
[![Documentation Status](https://readthedocs.org/projects/datascience/badge/?version=master)](http://datascience.readthedocs.org/en/master/?badge=master)


*written by Professor [John DeNero](http://denero.org), Professor
[David Culler](http://www.cs.berkeley.edu/~culler),
[Sam Lau](https://github.com/samlau95), and [Alvin Wan](http://alvinwan.com)*

For an example of usage, see the [Berkeley Data 8 class](http://databears.berkeley.edu/content/csinfostat-c8-foundations-data-science).
For an example of usage, see the [Berkeley Data 8 class](http://data8.org/).

[![Build Status](https://travis-ci.org/data-8/datascience.svg?branch=master)](https://travis-ci.org/data-8/datascience)
[![Coverage Status](https://coveralls.io/repos/dsten/datascience/badge.svg?branch=master&service=github)](https://coveralls.io/github/dsten/datascience?branch=master)
Expand All @@ -23,6 +23,31 @@ Use `pip`:
pip install datascience
```

## Changelog

This project adheres to [Semantic Versioning](http://semver.org/).

### [Unreleased]
None yet.

### v0.8.0
**Breaking changes**

- Change default behavior of `table.sample` to `with_replacement=True` instead
of `False`. (3717b67)

**Additions**

- Added `Map.copy`.
- Added `Map.overlay` which overlays a feature(s) on a new copy of Map.
(315bb63e)

### v0.7.1
- Remove rogue print from `table.hist`

### v0.7.0
- Added predicates for string comparison: `containing` and `contained_in`. (#231)

## Documentation

API reference is at http://data8.org/datascience/ .
Expand Down
54 changes: 54 additions & 0 deletions datascience/maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import functools
import random

from .tables import Table

_number = (int, float, np.number)


Expand Down Expand Up @@ -92,6 +94,13 @@ def __init__(self, features=(), ids=(), width=960, height=500, **kwargs):
self._height = height
self._attrs.update(kwargs)

def copy(self):
"""
Copies the current Map into a new one and returns it.
"""
return Map(features=self._features, width=self._width,
height=self._height, **self._attrs)

def __getitem__(self, id):
return self._features[id]

Expand Down Expand Up @@ -257,6 +266,51 @@ def color(self, values, ids=(), key_on='feature.id', palette='YlOrBr', **kwargs)
colored._folium_map = m
return colored

def overlay(self, feature, color='Blue', opacity=0.6):
"""
Overlays ``feature`` on the map. Returns a new Map.

Args:
``feature``: a ``Table`` of map features, a list of map features,
a Map, a Region, or a circle marker map table. The features will
be overlayed on the Map with specified ``color``.

``color`` (``str``): Color of feature. Defaults to 'Blue'

``opacity`` (``float``): Opacity of overlain feature. Defaults to
0.6.

Returns:
A new ``Map`` with the overlain ``feature``.
"""
result = self.copy()
if type(feature) == Table:
# if table of features e.g. Table.from_records(taz_map.features)
if 'feature' in feature:
feature = feature['feature']

# if marker table e.g. table with columns: latitudes,longitudes,popup,color,radius
else:
feature = Circle.map_table(feature)

if type(feature) in [list, np.ndarray]:
for f in feature:
f._attrs['fill_color'] = color
f._attrs['fill_opacity'] = opacity
f.draw_on(result._folium_map)

elif type(feature) == Map:
for i in range(len(feature._features)):
f = feature._features[i]
f._attrs['fill_color'] = color
f._attrs['fill_opacity'] = opacity
f.draw_on(result._folium_map)
elif type(feature) == Region:
feature._attrs['fill_color'] = color
feature._attrs['fill_opacity'] = opacity
feature.draw_on(result._folium_map)
return result

@classmethod
def read_geojson(cls, path_or_json_or_string):
"""Read a geoJSON string, object, or file. Return a dict of features keyed by ID."""
Expand Down
25 changes: 10 additions & 15 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import pandas
import IPython

import datascience.maps as _maps
import datascience.formats as _formats
import datascience.util as _util
from datascience.util import make_array
Expand Down Expand Up @@ -1170,7 +1169,7 @@ def percentile(self, p):
percentiles = [[_util.percentile(p, column)] for column in self.columns]
return self._with_columns(percentiles)

def sample(self, k=None, with_replacement=False, weights=None):
def sample(self, k=None, with_replacement=True, weights=None):
"""Returns a new table where k rows are randomly sampled from the
original table.

Expand All @@ -1179,8 +1178,8 @@ def sample(self, k=None, with_replacement=False, weights=None):
sampled. If an integer, k rows from the original table are
sampled.

with_replacement (bool): If False (default), samples the rows
without replacement. If True, samples the rows with replacement.
with_replacement (bool): If True (default), samples the rows with
replacement. If False, samples the rows without replacement.

weights (list/array or None): If None (default), samples the rows
using a uniform random distribution. If a list/array is passed
Expand All @@ -1204,9 +1203,15 @@ def sample(self, k=None, with_replacement=False, weights=None):
>>> jobs.sample() # doctest: +SKIP
job | wage
b | 20
c | 15
b | 20
a | 10
d | 8
>>> jobs.sample(with_replacement=True) # doctest: +SKIP
job | wage
d | 8
b | 20
c | 15
a | 10
>>> jobs.sample(k = 2) # doctest: +SKIP
job | wage
b | 20
Expand Down Expand Up @@ -2149,16 +2154,6 @@ def boxplot(self, **vargs):
values = list(columns.values())
plt.boxplot(values, **vargs)

# Deprecated
def points(self, column__lat, column__long, labels=None, colors=None, **kwargs) :
"""Draw points from latitude and longitude columns. [Deprecated]"""
warnings.warn("points is deprecated. Use Circle.map", FutureWarning)
latitudes = self._get_column(column__lat)
longitudes = self._get_column(column__long)
if labels is not None : labels = self._get_column(labels)
if colors is not None : colors = self._get_column(colors)
return _maps.Circle.map(latitudes, longitudes, labels=labels, colors=colors, **kwargs)


###########
# Support #
Expand Down
2 changes: 1 addition & 1 deletion datascience/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.7.1'
__version__ = '0.8.0'
3 changes: 2 additions & 1 deletion docs/tables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ Creation
:toctree: _autosummary

Table.__init__
Table.empty
Table.from_records
Table.from_columns_dict
Table.read_table
Table.from_df
Table.from_array
Expand Down Expand Up @@ -132,6 +134,5 @@ Visualizations
Table.barh
Table.pivot_hist
Table.hist
Table.points
Table.scatter
Table.boxplot
2 changes: 1 addition & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ From the text:

.. ipython:: python

baby = Table.read_table('http://data8.org/textbook/notebooks/baby.csv')
baby = Table.read_table('https://github.com/data-8/textbook/raw/9aa0a167bc514749338cd7754f2b339fd095ee9b/notebooks/baby.csv')
baby # Let's take a peek at the table

# Select out columns we want.
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ def test_sample_weights_worepl(table):
"""
iterations, i = 100, 0
while i < iterations:
u = table.sample(table.num_rows)
u = table.sample(table.num_rows, with_replacement=False)
assert len(set(u.rows)) == len(u.rows)
i += 1

Expand All @@ -1010,7 +1010,7 @@ def test_sample_weights_with_none_k(table):
"""
iterations, i = 100, 0
while i < iterations:
u = table.sample()
u = table.sample(with_replacement=False)
assert len(set(u.rows)) == len(u.rows)
i += 1

Expand Down