In [None]:
#| default_exp core

# core


> Monkey patches for pandas.

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from functools import partial
from itertools import chain
import math

from fastcore.all import *
import pandas as pd
# import ipywidgets as widgets
# from IPython.display import display

## Utils

In [None]:
#| export
def dummydf(): return pd.DataFrame({'col_1': range(100, 105), 'col_2': ['a','b','c','d','e']})

## Transformations

In [None]:
#| export
@patch
def repetitions(self:pd.DataFrame, col): return self.groupby(col).size()

In [None]:
#| export
add_docs(pd.DataFrame,
         repetitions='Counts the number of repetitions for each element.',
         ffill=pd.core.generic.NDFrame.ffill.__doc__,
         bfill=pd.core.generic.NDFrame.bfill.__doc__,
         clip=pd.core.generic.NDFrame.clip.__doc__,
         interpolate=pd.core.generic.NDFrame.interpolate.__doc__,
         where=pd.core.generic.NDFrame.where.__doc__,
         mask=pd.core.generic.NDFrame.mask.__doc__)

In [None]:
nodoc = [c for n,c in vars(pd.DataFrame).items() if callable(c) and not n.startswith('_') and c.__doc__ is None]

In [None]:
show_doc(pd.DataFrame.repetitions)

In [None]:
df = pd.DataFrame({'a': [1, 2, 3, 4, 4, 5, 5, 6, 6, 6], 'b':[1, 1, 1, 1, 2, 2, 2, 3, 3, 4]})
df.repetitions('b')

In [None]:
test(df.repetitions('b'), pd.Series({1:4, 2:3,3:2, 4:1}), all_equal)

In [None]:
#| export
@patch
def repetition_counts(self:pd.DataFrame, col): return self.repetitions(col).value_counts()

In [None]:
#| export
add_docs(pd.DataFrame,repetition_counts='Counts the number of groups with the same number of repetitions.')

In [None]:
show_doc(pd.DataFrame.repetition_counts) 

In the following example there are three groups with one element, two groups with two elements, and one group with three elements.

In [None]:
df.repetition_counts('a')

In [None]:
test(df.repetition_counts('a'), pd.Series({1: 3, 2:2, 3:1}), all_equal)

In [None]:
#| export
@patch
def single_events(self:pd.DataFrame, col): return self.set_index(col).loc[self.repetitions(col)==1].reset_index()

In [None]:
#| export
add_docs(pd.DataFrame, single_events='Returns rows that appear only once.')

In [None]:
show_doc(pd.DataFrame.single_events)

In [None]:
df.single_events('a')

In [None]:
test_eq(df.single_events('a'), df.loc[[0, 1, 2]])

## Functions as methods


Pandas functions that are easier to execute as DataFrame/Series methods.


In [None]:
#| export
@patch
@delegates(pd.crosstab)
def crosstab(self:pd.DataFrame, index, column, **kwargs): return pd.crosstab(self[index], self[column], **kwargs)

In [None]:
#| export
add_docs(pd.DataFrame, crosstab=pd.crosstab.__doc__)

In [None]:
#| export
@patch
def len(self:pd.DataFrame): return len(self)

In [None]:
#| export
add_docs(pd.DataFrame, len=len.__doc__)

In [None]:
#| export
@patch
def len(self:pd.Series): return len(self)

## One-liners

> These methods allow fast exploration of the data in one line.

In [None]:
#| export
@patch(as_prop=True)
def l(self:pd.Index): return L(self, use_list=True)

In [None]:
#| export
@patch(as_prop=True)
def minmax(self:pd.Series): return (self.min(), self.max())

In [None]:
#| export
@patch
def page(self:pd.DataFrame, page, page_size=5):
    'Shows rows between `page*page_size` and `(page+1)*page_size`'
    return self.head(page*page_size).tail(min(self.len()-(page-1)*page_size, page_size))

In [None]:
df = pd.DataFrame({'a': range(12), 'b': range(12)})
df.page(3)

In [None]:
#| export
@patch
def page(self:pd.Series, page, page_size=5):
    'Shows rows between `page*page_size` and `(page+1)*page_size`'
    return self.head(page*page_size).tail(min(self.len()-(page-1)*page_size, page_size))

In [None]:
#| export
add_docs(pd.Series, 
         ffill=pd.core.generic.NDFrame.ffill.__doc__,
         bfill=pd.core.generic.NDFrame.bfill.__doc__,
         clip=pd.core.generic.NDFrame.clip.__doc__,
         interpolate=pd.core.generic.NDFrame.interpolate.__doc__,
         where=pd.core.generic.NDFrame.where.__doc__,
         mask=pd.core.generic.NDFrame.mask.__doc__,
         len=len.__doc__)

In [None]:
s = pd.Series(range(15))
s.page(2)

## Method Variations


These methods are slight variations from DataFrame ones.


In [None]:
#| export
@patch
def renamec(self:pd.DataFrame, d, *args, **kwargs):
    if args:
        if isinstance(d, dict): d = chain(*d.items())
        d = dict(chunked(listify(d) + listify(args), 2))
    return self.rename(columns=d, **kwargs)

In [None]:
df = dummydf()
df.renamec({'col_1': 'col_a'}, 'col_2', 'bar')

In [None]:
#| export
add_docs(pd.DataFrame, renamec='Renames column names.')

In [None]:
#| export
@patch
def notin(self:pd.Series, values): return ~self.isin(values)

In [None]:
#| export
add_docs(pd.Series, notin='Whether elements in Series are not contained in `values`.')

In [None]:
#| export
@patch
def mapk(self:pd.Series, fun, **kwargs): return self.map(partial(fun, **kwargs))

In [None]:
#| export
add_docs(pd.Series, mapk='Like map but passes kwargs to function.')

In [None]:
#| export
@patch
@delegates(pd.DataFrame.sort_values)
def sort(self:pd.DataFrame, by, **kwargs): return self.sort_values(by, **kwargs)

In [None]:
#| export
add_docs(pd.DataFrame, sort=pd.DataFrame.sort_values.__doc__)

In [None]:
temp = df.sample(df.len())
test_eq(temp.sort('col_1'), df)

## Move columns to the front/back

In [None]:
#| export
@patch
def c2back(self:pd.DataFrame, cols2back):
    if not is_listy(cols2back): cols2back = [cols2back]
    cols = [c for c in self.columns if c not in cols2back]+cols2back
    return self[cols]

In [None]:
#| export
@patch
def c2front(self:pd.DataFrame, cols2front):
    if not is_listy(cols2front): cols2front = [cols2front]
    cols = cols2front + [c for c in self.columns if c not in cols2front]
    return self[cols]

In [None]:
#| export
add_docs(pd.DataFrame,
         c2back="Move columns to back",
         c2front="Move columns to front")

In [None]:
df = dummydf()

In [None]:
df.c2back(['col_1'])

In [None]:
df.c2back('col_1')

In [None]:
df.c2front('col_2')

In [None]:
df.c2front(['col_2'])

In [None]:
#| export
@patch
def reorderc(self:pd.DataFrame, to_front=[], to_back=[]):
    '''Reorder DataFrame columns.'''
    return self.c2front(to_front).c2back(to_back)

In [None]:
df['col_3'] = df['col_1']
df.reorderc(['col_3'], ['col_1'])

## Export -

In [None]:
#|hide
#|eval: false
from nbdev import nbdev_export; nbdev_export()