# Selection

## DataFrames

In [134]:
import pandas as pd
import numpy as np

In [135]:
df = pd.DataFrame(
    {'AAA': [4, 5, 6, 7], 'BBB': [10, 20, 30, 40], 'CCC': [100, 50, -30, -50]}); df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [136]:
df.loc[(df.AAA <= 6) & (df.index.isin([0, 2, 4]))]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [137]:
data = {'AAA': [4, 5, 6, 7], 'BBB': [10, 20, 30, 40], 'CCC': [100, 50, -30, -50]}

In [138]:
df = pd.DataFrame(data=data, index=['foo', 'bar', 'boo', 'kar']); df

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [139]:
df.iloc[0:3]

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [140]:
df.loc['bar':'kar']

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [141]:
df2 = pd.DataFrame(data=data, index=[1,2,3,4]);df2

Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30
4,7,40,-50


In [142]:
df2.iloc[1:3]

Unnamed: 0,AAA,BBB,CCC
2,5,20,50
3,6,30,-30


In [143]:
df2.loc[1:3]

Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30


In [144]:
df[~((df.AAA <= 6) & (df.index.isin(['foo', 'boo'])))]

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
kar,7,40,-50


## Panels

In [145]:
rng = pd.date_range('1/1/2013', periods=100, freq='D')
data = np.random.rand(100, 4)
cols = ['A', 'B', 'C', 'D']

In [146]:
df1 = pd.DataFrame(data, rng, cols)
df2 = pd.DataFrame(data, rng, cols)
df3 = pd.DataFrame(data, rng, cols)

In [147]:
pf = pd.Panel({'df1': df1, 'df2': df2, 'df3': df3}); pf

<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 100 (major_axis) x 4 (minor_axis)
Items axis: df1 to df3
Major_axis axis: 2013-01-01 00:00:00 to 2013-04-10 00:00:00
Minor_axis axis: A to D

### New Columns

In [148]:
df = pd.DataFrame({'AAA': [1,2,1,3], 'BBB': [1,1,2,2], 'CCC': [2,1,3,1]}); df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [149]:
source_cols = df.columns; source_cols

Index(['AAA', 'BBB', 'CCC'], dtype='object')

In [150]:
new_cols = [str(x) + '_cat' for x in source_cols]; new_cols

['AAA_cat', 'BBB_cat', 'CCC_cat']

In [151]:
categories = {1: 'Alpha', 2: 'Beta', 3: 'Charlie'}

In [167]:
categories.get(2)

'Beta'

In [153]:
df[source_cols]

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [154]:
df[new_cols] = df.applymap(categories.get); df

Unnamed: 0,AAA,BBB,CCC,AAA_cat,BBB_cat,CCC_cat
0,1,1,2,Alpha,Alpha,Beta
1,2,1,1,Beta,Alpha,Alpha
2,1,2,3,Alpha,Beta,Charlie
3,3,2,1,Charlie,Beta,Alpha


In [155]:
df = pd.DataFrame({'AAA': [1,1,1,2,2,2,3,3], 'BBB': [2,1,3,4,5,1,2,3]}); df

Unnamed: 0,AAA,BBB
0,1,2
1,1,1
2,1,3
3,2,4
4,2,5
5,2,1
6,3,2
7,3,3


In [156]:
df.loc[df.groupby('AAA')['BBB'].idxmin()]

Unnamed: 0,AAA,BBB
1,1,1
5,2,1
6,3,2


In [157]:
df.sort_values(by='BBB').groupby('AAA', as_index=False).first()

Unnamed: 0,AAA,BBB
0,1,1
1,2,1
2,3,2


---

# .isin

In [158]:
help(df.index.isin)

Help on method isin in module pandas.core.indexes.base:

isin(values, level=None) method of pandas.core.indexes.range.RangeIndex instance
    Compute boolean array of whether each index value is found in the
    passed set of values.
    
    Parameters
    ----------
    values : set or list-like
        Sought values.
    
        .. versionadded:: 0.18.1
    
        Support for values as a set
    
    level : str or int, optional
        Name or position of the index level to use (if the index is a
        MultiIndex).
    
    Notes
    -----
    If `level` is specified:
    
    - if it is the name of one *and only one* index level, use that level;
    - otherwise it should be a number indicating level position.
    
    Returns
    -------
    is_contained : ndarray (boolean dtype)



In [159]:
df = pd.DataFrame({'R1': [1, 1, 3, 5],  'R2': [0, 1, 2, 5], 'R3': [1, 5, 4, 1]}, index=['one', 'two', 'three', 'four']); df

Unnamed: 0,R1,R2,R3
one,1,0,1
two,1,1,5
three,3,2,4
four,5,5,1


In [160]:
df.isin([1])

Unnamed: 0,R1,R2,R3
one,True,False,True
two,True,True,False
three,False,False,False
four,False,False,True


In [161]:
df.loc['one'].isin([1])

R1     True
R2    False
R3     True
Name: one, dtype: bool

In [162]:
df.index.isin(['one'])

array([ True, False, False, False], dtype=bool)

In [163]:
df.R1.isin([2])

one      False
two      False
three    False
four     False
Name: R1, dtype: bool

---

## pd.date_range

In [164]:
help(pd.date_range)

Help on function date_range in module pandas.core.indexes.datetimes:

date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs)
    Return a fixed frequency datetime index, with day (calendar) as the default
    frequency
    
    Parameters
    ----------
    start : string or datetime-like, default None
        Left bound for generating dates
    end : string or datetime-like, default None
        Right bound for generating dates
    periods : integer or None, default None
        If None, must specify start and end
    freq : string or DateOffset, default 'D' (calendar daily)
        Frequency strings can have multiples, e.g. '5H'
    tz : string or None
        Time zone name for returning localized DatetimeIndex, for example
        Asia/Hong_Kong
    normalize : bool, default False
        Normalize start/end dates to midnight before generating date range
    name : str, default None
        Name of the resulting index
  

In [165]:
pd.date_range('2017-11-12', periods=3, freq='1D')

DatetimeIndex(['2017-11-12', '2017-11-13', '2017-11-14'], dtype='datetime64[ns]', freq='D')

---

# applymap()

In [166]:
df[source_cols].applymap(categories.get)

KeyError: "Index(['AAA', 'BBB', 'CCC'], dtype='object') not in index"

---

# .get

---

# df.sort_values

---

# first

---

# df.groupby

---