Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 57 additions & 16 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,9 @@ def column_index(self, column_label):
return self.column_labels.index(column_label)

def apply(self, fn, column_label):
"""Apply a function to each element of a column."""
return [fn(v) for v in self[column_label]]
"""Returns an array where fn is applied to each element
of a specified column."""
return np.array([fn(v) for v in self[column_label]])

##########
# Modify #
Expand Down Expand Up @@ -458,6 +459,59 @@ def percentile(self, p):
percentiles = [percentile(p, self[column_name]) for column_name in self]
return Table(percentiles, self.column_labels)

def sample(self, k=None, with_replacement=False, weights=None):
"""Returns a new table where k rows are randomly sampled from the
original table.

Kwargs:
k (int or None): If None (default), all the rows in the table are
sampled. If an integer, k rows from the original table are
sampled.

with_replacement (bool): If False (default), samples the rows
without replacement. If True, samples the rows with replacement.

weights (list/array or None): If None (default), samples the rows
using a uniform random distribution. If a list/array is passed
in, it must be the same length as the number of rows in the
table and the values must sum to 1. The rows will then be
sampled according the the probability distribution in
``weights``.

Returns:
A new instance of ``Table``.

>>> foo_table
job | wage
a | 10
b | 20
c | 15
d | 8

>>> foo_table.sample()
job | wage
b | 20
c | 15
a | 10
d | 8

>>> foo_table.sample(k = 2)
job | wage
b | 20
c | 15

>>> foo_table.sample(k = 2, with_replacement = True,
... weights = [0.5, 0.5, 0, 0])
job | wage
a | 10
a | 10

"""
n = self.num_rows
rows = [self.rows[index] for index in
np.random.choice(n, k or n, replace=with_replacement, p=weights)]
return Table.from_rows(rows, self.column_labels)

##################
# Export/Display #
##################
Expand Down Expand Up @@ -540,19 +594,6 @@ def index_by(self, column_or_label):
index.setdefault(key, []).append(row)
return index

def _sample(self, k, with_replacement, weights):
"""Returns list of sampled rows"""
n = self.num_rows
indices = np.random.choice(
n, k or n, replace=with_replacement, p=weights)
return [self.rows[i] for i in indices]

def sample(self, k=None, with_replacement=False, weights=None):
"""Returns a new table"""
return Table.from_rows(
self._sample(k, with_replacement, weights),
self.column_labels)

#############
# Visualize #
#############
Expand Down Expand Up @@ -611,7 +652,7 @@ def barh(self, column_for_categories, overlay=False, **vargs):
(instead of the default behavior of creating n - 1 charts).
Also adds a legend that matches each bar color to its column.

vargs: Additional arguments that get passed into :func:plt.barh.
vargs: Additional arguments that get passed into `plt.barh`.
See http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh
for additional arguments that can be passed into vargs. These
include: `linewidth`, `xerr`, `yerr`, and `log`, to name a few.
Expand Down
6 changes: 6 additions & 0 deletions docs/_autosummary/datascience.tables.Table.sample.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
datascience.tables.Table.sample
===============================

.. currentmodule:: datascience.tables

.. automethod:: Table.sample
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/_build/doctrees/_autosummary/datascience.tables.Table.doctree
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/_build/doctrees/_autosummary/datascience.tables.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/formats.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/index.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/maps.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tables.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tutorial.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/util.doctree
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
<script type="text/javascript" src="../_static/jquery.js"></script>
<script type="text/javascript" src="../_static/underscore.js"></script>
<script type="text/javascript" src="../_static/doctools.js"></script>
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
<link rel="up" title="Tables (datascience.tables)" href="../tables.html" />
<link rel="next" title="datascience.tables.Table.__len__" href="datascience.tables.Table.__len__.html" />
<link rel="prev" title="datascience.tables.Table.__len__" href="datascience.tables.Table.__len__.html" />
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
</head>
<body role="document">
<div class="related" role="navigation" aria-label="related navigation">
Expand All @@ -38,14 +35,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__len__.html" title="datascience.tables.Table.__len__"
accesskey="N">next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__len__.html" title="datascience.tables.Table.__len__"
accesskey="P">previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" accesskey="U">Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
</ul>
</div>

Expand Down Expand Up @@ -75,12 +65,6 @@ <h1>datascience.tables.Table.__init__<a class="headerlink" href="#datascience-ta
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h4>Previous topic</h4>
<p class="topless"><a href="datascience.tables.Table.__len__.html"
title="previous chapter">datascience.tables.Table.__len__</a></p>
<h4>Next topic</h4>
<p class="topless"><a href="datascience.tables.Table.__len__.html"
title="next chapter">datascience.tables.Table.__len__</a></p>
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
Expand Down Expand Up @@ -114,14 +98,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__len__.html" title="datascience.tables.Table.__len__"
>next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__len__.html" title="datascience.tables.Table.__len__"
>previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" >Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
</ul>
</div>
<div class="footer" role="contentinfo">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
<script type="text/javascript" src="../_static/jquery.js"></script>
<script type="text/javascript" src="../_static/underscore.js"></script>
<script type="text/javascript" src="../_static/doctools.js"></script>
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
<link rel="up" title="Tables (datascience.tables)" href="../tables.html" />
<link rel="next" title="Maps (datascience.maps)" href="../maps.html" />
<link rel="prev" title="datascience.tables.Table.__init__" href="datascience.tables.Table.__init__.html" />
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
</head>
<body role="document">
<div class="related" role="navigation" aria-label="related navigation">
Expand All @@ -38,14 +35,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
accesskey="N">next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__init__.html" title="datascience.tables.Table.__init__"
accesskey="P">previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" accesskey="U">Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
</ul>
</div>

Expand All @@ -69,12 +59,6 @@ <h1>datascience.tables.Table.__len__<a class="headerlink" href="#datascience-tab
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h4>Previous topic</h4>
<p class="topless"><a href="datascience.tables.Table.__init__.html"
title="previous chapter">datascience.tables.Table.__init__</a></p>
<h4>Next topic</h4>
<p class="topless"><a href="../maps.html"
title="next chapter">Maps (<code class="docutils literal"><span class="pre">datascience.maps</span></code>)</a></p>
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
Expand Down Expand Up @@ -108,14 +92,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
>next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.__init__.html" title="datascience.tables.Table.__init__"
>previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" >Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
</ul>
</div>
<div class="footer" role="contentinfo">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<script type="text/javascript" src="../_static/doctools.js"></script>
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
<link rel="up" title="Tables (datascience.tables)" href="../tables.html" />
<link rel="next" title="Maps (datascience.maps)" href="../maps.html" />
<link rel="next" title="datascience.tables.Table.set_format" href="datascience.tables.Table.set_format.html" />
<link rel="prev" title="datascience.tables.Table.column_index" href="datascience.tables.Table.column_index.html" />
</head>
<body role="document">
Expand All @@ -39,7 +39,7 @@ <h3>Navigation</h3>
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
<a href="datascience.tables.Table.set_format.html" title="datascience.tables.Table.set_format"
accesskey="N">next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.column_index.html" title="datascience.tables.Table.column_index"
Expand All @@ -59,7 +59,8 @@ <h1>datascience.tables.Table.apply<a class="headerlink" href="#datascience-table
<dl class="method">
<dt id="datascience.tables.Table.apply">
<code class="descclassname">Table.</code><code class="descname">apply</code><span class="sig-paren">(</span><em>fn</em>, <em>column_label</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/datascience/tables.html#Table.apply"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#datascience.tables.Table.apply" title="Permalink to this definition">¶</a></dt>
<dd><p>Apply a function to each element of a column.</p>
<dd><p>Returns an array where fn is applied to each element
of a specified column.</p>
</dd></dl>

</div>
Expand All @@ -74,8 +75,8 @@ <h4>Previous topic</h4>
<p class="topless"><a href="datascience.tables.Table.column_index.html"
title="previous chapter">datascience.tables.Table.column_index</a></p>
<h4>Next topic</h4>
<p class="topless"><a href="../maps.html"
title="next chapter">Maps (<code class="docutils literal"><span class="pre">datascience.maps</span></code>)</a></p>
<p class="topless"><a href="datascience.tables.Table.set_format.html"
title="next chapter">datascience.tables.Table.set_format</a></p>
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
Expand Down Expand Up @@ -110,7 +111,7 @@ <h3>Navigation</h3>
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
<a href="datascience.tables.Table.set_format.html" title="datascience.tables.Table.set_format"
>next</a> |</li>
<li class="right" >
<a href="datascience.tables.Table.column_index.html" title="datascience.tables.Table.column_index"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ <h1>datascience.tables.Table.barh<a class="headerlink" href="#datascience-tables
<dd>category, one for each column other than <cite>column_for_categories</cite>
(instead of the default behavior of creating n - 1 charts).
Also adds a legend that matches each bar color to its column.</dd>
<dt>vargs: Additional arguments that get passed into :func:plt.barh.</dt>
<dt>vargs: Additional arguments that get passed into <cite>plt.barh</cite>.</dt>
<dd>See <a class="reference external" href="http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh">http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.barh</a>
for additional arguments that can be passed into vargs. These
include: <cite>linewidth</cite>, <cite>xerr</cite>, <cite>yerr</cite>, and <cite>log</cite>, to name a few.</dd>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
<script type="text/javascript" src="../_static/doctools.js"></script>
<link rel="top" title="datascience 0.3.4 documentation" href="../index.html" />
<link rel="up" title="Tables (datascience.tables)" href="../tables.html" />
<link rel="next" title="Maps (datascience.maps)" href="../maps.html" />
<link rel="prev" title="datascience.tables.Table.from_rows" href="" />
<link rel="next" title="datascience.tables.Table.from_records" href="datascience.tables.Table.from_records.html" />
<link rel="prev" title="datascience.tables.Table" href="datascience.tables.Table.html" />
</head>
<body role="document">
<div class="related" role="navigation" aria-label="related navigation">
Expand All @@ -39,10 +39,10 @@ <h3>Navigation</h3>
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
<a href="datascience.tables.Table.from_records.html" title="datascience.tables.Table.from_records"
accesskey="N">next</a> |</li>
<li class="right" >
<a href="#" title="datascience.tables.Table.from_rows"
<a href="datascience.tables.Table.html" title="datascience.tables.Table"
accesskey="P">previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" accesskey="U">Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
Expand Down Expand Up @@ -71,11 +71,11 @@ <h1>datascience.tables.Table.from_rows<a class="headerlink" href="#datascience-t
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h4>Previous topic</h4>
<p class="topless"><a href=""
title="previous chapter">datascience.tables.Table.from_rows</a></p>
<p class="topless"><a href="datascience.tables.Table.html"
title="previous chapter">datascience.tables.Table</a></p>
<h4>Next topic</h4>
<p class="topless"><a href="../maps.html"
title="next chapter">Maps (<code class="docutils literal"><span class="pre">datascience.maps</span></code>)</a></p>
<p class="topless"><a href="datascience.tables.Table.from_records.html"
title="next chapter">datascience.tables.Table.from_records</a></p>
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
Expand Down Expand Up @@ -110,10 +110,10 @@ <h3>Navigation</h3>
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="right" >
<a href="../maps.html" title="Maps (datascience.maps)"
<a href="datascience.tables.Table.from_records.html" title="datascience.tables.Table.from_records"
>next</a> |</li>
<li class="right" >
<a href="#" title="datascience.tables.Table.from_rows"
<a href="datascience.tables.Table.html" title="datascience.tables.Table"
>previous</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">datascience 0.3.4 documentation</a> &raquo;</li>
<li class="nav-item nav-item-1"><a href="../tables.html" >Tables (<code class="docutils literal"><span class="pre">datascience.tables</span></code>)</a> &raquo;</li>
Expand Down
Loading