## Pandas Select Data

In [1]:
import pandas as pd
import numpy  as np

### Prepare data frame df

In [2]:
df = pd.DataFrame(
    np.arange(24).reshape((6,4)), 
    index   = pd.date_range("20170101", periods=6),
    columns = ['A', 'B', 'C', 'D'])
print(df)

             A   B   C   D
2017-01-01   0   1   2   3
2017-01-02   4   5   6   7
2017-01-03   8   9  10  11
2017-01-04  12  13  14  15
2017-01-05  16  17  18  19
2017-01-06  20  21  22  23


### Select column
* df [ col_name ]
* df . col_name
* df[ *list_of_col_name* ]

In [3]:
df['A']

2017-01-01     0
2017-01-02     4
2017-01-03     8
2017-01-04    12
2017-01-05    16
2017-01-06    20
Freq: D, Name: A, dtype: int64

In [4]:
df.A

2017-01-01     0
2017-01-02     4
2017-01-03     8
2017-01-04    12
2017-01-05    16
2017-01-06    20
Freq: D, Name: A, dtype: int64

In [5]:
df[ ['A', 'B'] ]

Unnamed: 0,A,B
2017-01-01,0,1
2017-01-02,4,5
2017-01-03,8,9
2017-01-04,12,13
2017-01-05,16,17
2017-01-06,20,21


### Select row

* df [ *left_index : right_index* ]
* df [ *left_value : right_value* ]

In [6]:
df[0:3]

Unnamed: 0,A,B,C,D
2017-01-01,0,1,2,3
2017-01-02,4,5,6,7
2017-01-03,8,9,10,11


In [7]:
df['20170101':'20170103']

Unnamed: 0,A,B,C,D
2017-01-01,0,1,2,3
2017-01-02,4,5,6,7
2017-01-03,8,9,10,11


### Select by loc function

* df . loc [ *row_value* , *col_value* ]

In [8]:
df.loc['20170101']

A    0
B    1
C    2
D    3
Name: 2017-01-01 00:00:00, dtype: int64

In [9]:
df.loc['20170101', ['A', 'B']]

A    0
B    1
Name: 2017-01-01 00:00:00, dtype: int64

### Select by iloc function ---- index

* df . iloc [ *row_index* , *col_index* ]

In [10]:
df.iloc[1:3]

Unnamed: 0,A,B,C,D
2017-01-02,4,5,6,7
2017-01-03,8,9,10,11


In [11]:
df.iloc[1:3, 0:3]

Unnamed: 0,A,B,C
2017-01-02,4,5,6
2017-01-03,8,9,10


In [12]:
df.iloc[[0,2], [2,3]]

Unnamed: 0,C,D
2017-01-01,2,3
2017-01-03,10,11


### Mixed labels and index for selction
* df . ix [ *row_index_or_label* , *col_index_or_label* ]

In [13]:
df.ix[:3, ['A', 'B']]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B
2017-01-01,0,1
2017-01-02,4,5
2017-01-03,8,9


In [14]:
df.ix[ '20170102':'20170103', 0:3 ]

Unnamed: 0,A,B,C
2017-01-02,4,5,6
2017-01-03,8,9,10


### Booleans slection 

In [15]:
print("Select rows where A > 8")
df[ df.A > 8 ]

Select rows where A > 8


Unnamed: 0,A,B,C,D
2017-01-04,12,13,14,15
2017-01-05,16,17,18,19
2017-01-06,20,21,22,23
