In [None]:
#default_exp core

# core


> Monkey patches for pandas.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import warnings
from functools import partial

from fastcore.all import *
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

## Transformations

In [None]:
#export
@patch
def repetitions(self:pd.DataFrame, col): return self.groupby(col).size()

In [None]:
#export
add_docs(pd.DataFrame, repetitions='Counts the number of repetitions for each element.')

In [None]:
show_doc(pd.DataFrame.repetitions)

<h4 id="DataFrame.repetitions" class="doc_header"><code>DataFrame.repetitions</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>DataFrame.repetitions</code>(**`col`**)

Counts the number of repetitions for each element.

In [None]:
df = pd.DataFrame({'a': [1, 2, 3, 4, 4, 5, 5, 6, 6, 6], 'b':[1, 1, 1, 1, 2, 2, 2, 3, 3, 4]})
df.repetitions('b')

b
1    4
2    3
3    2
4    1
dtype: int64

In [None]:
test(df.repetitions('b'), pd.Series({1:4, 2:3,3:2, 4:1}), all_equal)

In [None]:
#export
@patch
def repetition_counts(self:pd.DataFrame, col): return self.repetitions(col).value_counts()

In [None]:
#export
add_docs(pd.DataFrame,repetition_counts='Counts the number of groups with the same number of repetitions.')

In [None]:
show_doc(pd.DataFrame.repetition_counts) 

<h4 id="DataFrame.repetition_counts" class="doc_header"><code>DataFrame.repetition_counts</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>DataFrame.repetition_counts</code>(**`col`**)

Counts the number of groups with the same number of repetitions.

In the following example there are three groups with one element, two groups with two elements, and one group with three elements.

In [None]:
df.repetition_counts('a')

1    3
2    2
3    1
dtype: int64

In [None]:
test(df.repetition_counts('a'), pd.Series({1: 3, 2:2, 3:1}), all_equal)

In [None]:
#export
@patch
def single_events(self:pd.DataFrame, col): return self.set_index(col).loc[self.repetitions(col)==1].reset_index()

In [None]:
#export
add_docs(pd.DataFrame, single_events='Returns rows that appear only once.')

In [None]:
show_doc(pd.DataFrame.single_events)

<h4 id="DataFrame.single_events" class="doc_header"><code>DataFrame.single_events</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>DataFrame.single_events</code>(**`col`**)

Returns rows that appear only once.

In [None]:
df.single_events('a')

Unnamed: 0,a,b
0,1,1
1,2,1
2,3,1


In [None]:
test_eq(df.single_events('a'), df.loc[[0, 1, 2]])

## Functions as methods


Pandas functions that are easier to execute as DataFrame/Series methods.


In [None]:
#export
@patch
@delegates(pd.crosstab)
def crosstab(self:pd.DataFrame, index, column, **kwargs): return pd.crosstab(self[index], self[column], **kwargs)

In [None]:
#export
add_docs(pd.DataFrame, crosstab=pd.crosstab.__doc__)

In [None]:
#export
@patch
def len(self:pd.DataFrame): return len(self)

In [None]:
#export
add_docs(pd.DataFrame, len=len.__doc__)

## One-liners

> These methods allow fast exploration of the data in one line.

In [None]:
#export
@patch(as_prop=True)
def l(self:pd.Index): return L(self, use_list=True)

In [None]:
#export
@patch(as_prop=True)
def minmax(self:pd.Series): return (self.min(), self.max())

In [None]:
#export
@patch
def page(self:pd.DataFrame, page, page_size=5): return self.head(page*page_size).tail(page_size)

In [None]:
#export
add_docs(pd.DataFrame, page='Shows rows from page*page_size to (page+1)*page_size')

In [None]:
df = pd.DataFrame({'a': range(15), 'b': range(15)})
df.page(2)

Unnamed: 0,a,b
5,5,5
6,6,6
7,7,7
8,8,8
9,9,9


In [None]:
#export
@patch
def page(self:pd.Series, page, page_size=5): return self.head(page*page_size).tail(page_size)

In [None]:
#export
add_docs(pd.Series, page='Shows rows from page*page_size to (page+1)*page_size')

In [None]:
s = pd.Series(range(15))
s.page(2)

5    5
6    6
7    7
8    8
9    9
dtype: int64

## Method Variations


These methods are slight variations from DataFrame ones.


In [None]:
#export
@patch
def renamec(self:pd.DataFrame, d): return self.rename(columns=d)

In [None]:
#export
add_docs(pd.DataFrame, renamec='Renames column names.')

In [None]:
#export
@patch
def notin(self:pd.Series, values): return ~self.isin(values)

In [None]:
#export
add_docs(pd.Series, notin='Whether elements in Series are not contained in `values`.')

In [None]:
#export
@patch
def mapk(self:pd.Series, fun, **kwargs): return self.map(partial(fun, **kwargs))

In [None]:
#export
add_docs(pd.Series, mapk='Like map but passes kwargs to function.')

## Utils


In [None]:
#export
def dummydf(): return pd.DataFrame({'col_1': range(100, 105), 'col_2': ['a','b','c','d','e']})

## Display

In [None]:
#export
@patch
def title(self:pd.DataFrame, title):
    '''Displays DataFrame with a title.'''
    out = widgets.Output()
    with out: display(self)
    layout = widgets.Layout(align_items='center')
    return widgets.VBox([widgets.Label(title, layout=layout), out])

In [None]:
df = pd.DataFrame({'a': range(5), 'b': range(5)})
df.title('I am a table')

VBox(children=(Label(value='I am a table', layout=Layout(align_items='center')), Output()))

In [None]:
#export
class Walker:
    def __init__(self, val=0, min_val=None, max_val=None): store_attr()
    def _next(self, *args, **kwargs):
        self.val += 1
        if self.max_val: self.val = min(self.max_val, self.val)
    def _prev(self, *args, **kwargs):
        self.val -= 1
        if self.min_val: self.val = max(self.min_val, self.val)
        

In [None]:
#export
class Less:
    def __init__(self, df, page_size=5, page=1):
        store_attr()

        self.out = widgets.Output(wait=True)
        self.out_df = widgets.Output(wait=True)
        self.out_df.append_display_data(self.df.page(page, page_size=self.page_size))
        
        self.n_pages = len(df)//self.page_size+1
        self.page = Walker(val=page, min_val=1, max_val=self.n_pages)

        self.next = widgets.Button(description='next')
        self.next.on_click(self.page._next)
        self.next.on_click(self.refresh)

        self.prev = widgets.Button(description='prev')
        self.prev.on_click(self.page._prev)
        self.prev.on_click(self.refresh)
        
        layout = widgets.Layout(width='100%', display='flex', align_items='center')
        self.out_label = widgets.Output(wait=True)
        with self.out_label: display(widgets.Label(f"page {self.page.val} of {self.n_pages}"))
        self.box = widgets.VBox([self.out_df, widgets.HBox([self.prev, self.next, self.out_label])])
        with self.out: 
            display(self.box)

    def refresh(self, event):
        self.out_df.clear_output()
        with self.out_df: display(self.df.page(self.page.val, page_size=self.page_size))
        self.out_label.clear_output()
        with self.out_label: display(widgets.Label(f"page {self.page.val} of {self.n_pages}"))

In [None]:
#export
@patch
def less(self:pd.DataFrame, page_size=5, page=1): return Less(self, page_size=page_size, page=page).out

In [None]:
#export
add_docs(pd.DataFrame, less='Displays one page of the DataFrame and buttons to move forward and backward.')

In [None]:
df = pd.DataFrame({'a': range(17), 'b': range(17)})
df.less(page_size=7, page=2)

Label(value='page 2 of 3')

Output()

In [None]:
#export
@patch
def less(self:pd.Series, page_size=5): return Less(self, page_size=page_size).out

In [None]:
#export
add_docs(pd.Series, less='Displays one page of the Series and buttons to move forward and backward.')

In [None]:
s = pd.Series(range(7))
s.less()

Label(value='page 1 of 2')

Output()

## Move columns to the front/back

In [None]:
#export
@patch
def c2back(self:pd.DataFrame, cols2back):
    if not is_listy(cols2back): cols2back = [cols2back]
    cols = [c for c in self.columns if c not in cols2back]+cols2back
    return self[cols]

In [None]:
#export
@patch
def c2front(self:pd.DataFrame, cols2front):
    if not is_listy(cols2front): cols2front = [cols2front]
    cols = cols2front + [c for c in self.columns if c not in cols2front]
    return self[cols]

In [None]:
add_docs(pd.DataFrame,
         c2back='''Move columns to back''',
         c2front='''Move columns to front''')

In [None]:
df = dummydf()

In [None]:
df.c2back(['col_1'])

Unnamed: 0,col_2,col_1
0,a,100
1,b,101
2,c,102
3,d,103
4,e,104


In [None]:
df.c2back('col_1')

Unnamed: 0,col_2,col_1
0,a,100
1,b,101
2,c,102
3,d,103
4,e,104


In [None]:
df.c2front('col_2')

Unnamed: 0,col_2,col_1
0,a,100
1,b,101
2,c,102
3,d,103
4,e,104


In [None]:
df.c2front(['col_2'])

Unnamed: 0,col_2,col_1
0,a,100
1,b,101
2,c,102
3,d,103
4,e,104


## Export

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
