In [1]:
import numpy as np
import pandas as pd

In [2]:
index = pd.date_range('1/1/2000', periods=8)

In [3]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [4]:
df = pd.DataFrame(np.random.randn(8, 3), index=index,
                  columns=['A', 'B', 'C'])

In [5]:
wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
              major_axis=pd.date_range('1/1/2000', periods=5),
              minor_axis=['A', 'B', 'C', 'D'])

## Head and Tail

## Attributes and the raw ndarray(s)

In [6]:
df.shape

(8, 3)

In [7]:
df.index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

these attributes can be safely assigned to

In [9]:
df[:2]

Unnamed: 0,A,B,C
2000-01-01,-0.97068,-0.144796,0.985886
2000-01-02,-0.025946,-0.751199,0.673717


In [10]:
df.columns = [i.lower() for i in df.columns]

df

Unnamed: 0,a,b,c
2000-01-01,-0.97068,-0.144796,0.985886
2000-01-02,-0.025946,-0.751199,0.673717
2000-01-03,1.620817,-1.084019,0.62205
2000-01-04,-0.430483,0.905568,0.118472
2000-01-05,-0.760793,-0.924584,0.935454
2000-01-06,-1.178009,-0.927469,-0.238406
2000-01-07,1.036556,-0.25224,-0.529278
2000-01-08,-1.719594,0.150398,2.486739


In [11]:
s.values

array([-0.54819736, -2.19082258,  0.80241986, -1.23131441, -1.46518947])

In [12]:
df.values

array([[-0.97068026, -0.1447961 ,  0.98588572],
       [-0.02594588, -0.75119933,  0.67371735],
       [ 1.62081694, -1.08401893,  0.62204987],
       [-0.43048285,  0.90556787,  0.11847177],
       [-0.7607931 , -0.9245838 ,  0.93545357],
       [-1.17800869, -0.9274695 , -0.2384063 ],
       [ 1.03655556, -0.25224015, -0.52927805],
       [-1.7195936 ,  0.15039768,  2.48673901]])

In [13]:
wp.values

array([[[ 0.85511356,  0.16659908, -0.37635859, -0.38611292],
        [-1.06588997,  0.43418383, -0.70014374, -0.34234043],
        [ 0.2949323 , -0.72249584,  1.3227754 , -0.1309525 ],
        [ 0.72336333,  0.46222066,  0.44520933,  0.40392654],
        [ 0.65480279,  2.00515241, -0.73013632, -0.62810975]],

       [[ 0.00753278, -0.19912234,  1.67731   , -0.69634265],
        [-0.50391577,  2.19405035, -1.27979418, -0.11699061],
        [-0.12920773,  0.01081899, -0.49272121,  0.92782986],
        [-0.10967958,  0.56249413,  0.82806628,  0.41063139],
        [-1.20648804,  0.28973047, -0.64367748,  0.45432091]]])

## Flexible binary operations

With binary operations between pandas data structures, there are two key points of interest:
- Broadcasting behavior between higher- (e.g. DataFrame) and lower-dimensional (e.g. Series) objects.
- Missing data in computations

#### Matching / broadcasting behavior

In [14]:
df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                   'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                   'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,three,two
a,-1.65419,,0.600652
b,1.383082,-1.039004,0.394966
c,-0.442967,-0.331269,-0.476708
d,,-1.152193,-1.013828


In [17]:
row = df.iloc[1]
# type(row)
row

one      1.383082
three   -1.039004
two      0.394966
Name: b, dtype: float64

In [19]:
column = df['two']
column

a    0.600652
b    0.394966
c   -0.476708
d   -1.013828
Name: two, dtype: float64

In [20]:
df.sub(row, axis='columns')

Unnamed: 0,one,three,two
a,-3.037272,,0.205686
b,0.0,0.0,0.0
c,-1.826049,0.707735,-0.871674
d,,-0.113189,-1.408794


In [21]:
df

Unnamed: 0,one,three,two
a,-1.65419,,0.600652
b,1.383082,-1.039004,0.394966
c,-0.442967,-0.331269,-0.476708
d,,-1.152193,-1.013828


## Function application

## Sorting