In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## データアクセス

In [2]:
ser1 = Series(np.arange(3), index=['A', 'B', 'C'])

In [3]:
ser1 = 2*ser1

In [4]:
ser1

A    0
B    2
C    4
dtype: int64

In [5]:
ser1[2]

4

In [6]:
ser1['C']

4

In [7]:
ser1[['B', 'C']]

B    2
C    4
dtype: int64

In [8]:
ser1[ser1 < 2]

A    0
dtype: int64

In [9]:
ser1[ser1 < 3] = 3

In [10]:
ser1

A    3
B    3
C    4
dtype: int64

In [11]:
dframe1 = DataFrame(np.arange(25).reshape((5, 5)), index=['LA', 'NF', 'CD', 'TO', 'BF'], columns=['1', '2', '3', '4', '5'])

In [12]:
dframe1

Unnamed: 0,1,2,3,4,5
LA,0,1,2,3,4
NF,5,6,7,8,9
CD,10,11,12,13,14
TO,15,16,17,18,19
BF,20,21,22,23,24


In [13]:
dframe1['3']

LA     2
NF     7
CD    12
TO    17
BF    22
Name: 3, dtype: int64

In [15]:
dframe1[['1','5']]

Unnamed: 0,1,5
LA,0,4
NF,5,9
CD,10,14
TO,15,19
BF,20,24


In [16]:
dframe1[dframe1['1'] < 11]

Unnamed: 0,1,2,3,4,5
LA,0,1,2,3,4
NF,5,6,7,8,9
CD,10,11,12,13,14


In [18]:
dframe1 > 10

Unnamed: 0,1,2,3,4,5
LA,False,False,False,False,False
NF,False,False,False,False,False
CD,False,True,True,True,True
TO,True,True,True,True,True
BF,True,True,True,True,True


### indexアクセス

In [22]:
dframe1.ix['LA']

1    0
2    1
3    2
4    3
5    4
Name: LA, dtype: int64

## 形の違うデータ計算

In [23]:
ser2 = Series([0,1,2], index=['A','B','C'])

In [24]:
ser3 = Series([3,4,5,6], index=['A','B','C','D'])

#### index外はNaNになる

In [25]:
ser2 + ser3

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

In [27]:
dframe2 = DataFrame(np.arange(4).reshape((2,2)), index=['NF','BF'], columns=list('AB'))
# list('AB') => ['A','B']

In [28]:
dframe2

Unnamed: 0,A,B
NF,0,1
BF,2,3


In [31]:
dframe3 = DataFrame(np.arange(9).reshape((3,3)), index=['NF','BF','DC'], columns=list('ACD'))

In [32]:
dframe3

Unnamed: 0,A,C,D
NF,0,1,2
BF,3,4,5
DC,6,7,8


お互いの存在する値のみ足し合わせる(その他はNan)

In [33]:
dframe2 + dframe3

Unnamed: 0,A,B,C,D
BF,5.0,,,
DC,,,,
NF,0.0,,,


In [34]:
dframe2.add(dframe3, fill_value=0)

Unnamed: 0,A,B,C,D
BF,5.0,3.0,4.0,5.0
DC,6.0,,7.0,8.0
NF,0.0,1.0,1.0,2.0


In [35]:
ser4 = dframe2.ix[0]

In [36]:
ser4

A    0
B    1
Name: NF, dtype: int64

同一ラベルの部分は計算される

In [37]:
dframe2 - ser4

Unnamed: 0,A,B
NF,0,0
BF,2,2


## データの並べ替えと順番

In [38]:
ser5 = Series(range(3), index=list('CAB'))

In [39]:
ser5

C    0
A    1
B    2
dtype: int64

indexでソート

In [40]:
ser5.sort_index()

A    1
B    2
C    0
dtype: int64

中身は変わっていない

In [42]:
ser5

C    0
A    1
B    2
dtype: int64

In [44]:
from numpy.random import randn

In [45]:
ser6 = Series(randn(10))

In [46]:
ser6

0   -0.456520
1    1.226464
2    0.960142
3    0.114946
4   -0.169462
5   -1.597839
6   -1.599679
7    1.773371
8   -0.720272
9    1.285541
dtype: float64

#### DEPRECATED *order()*

In [47]:
ser6.order()

  if __name__ == '__main__':


6   -1.599679
5   -1.597839
8   -0.720272
0   -0.456520
4   -0.169462
3    0.114946
2    0.960142
1    1.226464
9    1.285541
7    1.773371
dtype: float64

sort_values() は中身を変化させない

In [48]:
ser6.sort_values()

6   -1.599679
5   -1.597839
8   -0.720272
0   -0.456520
4   -0.169462
3    0.114946
2    0.960142
1    1.226464
9    1.285541
7    1.773371
dtype: float64

In [49]:
ser6.rank()

0     4.0
1     8.0
2     7.0
3     6.0
4     5.0
5     2.0
6     1.0
7    10.0
8     3.0
9     9.0
dtype: float64

In [50]:
ser6

0   -0.456520
1    1.226464
2    0.960142
3    0.114946
4   -0.169462
5   -1.597839
6   -1.599679
7    1.773371
8   -0.720272
9    1.285541
dtype: float64

#### DEPRECATED sort()
sort() は中身を変化させる

In [51]:
ser6.sort()

  if __name__ == '__main__':


In [52]:
ser6

6   -1.599679
5   -1.597839
8   -0.720272
0   -0.456520
4   -0.169462
3    0.114946
2    0.960142
1    1.226464
9    1.285541
7    1.773371
dtype: float64

In [53]:
# 推奨
ser6.sort_values(inplace=True)

In [54]:
ser6

6   -1.599679
5   -1.597839
8   -0.720272
0   -0.456520
4   -0.169462
3    0.114946
2    0.960142
1    1.226464
9    1.285541
7    1.773371
dtype: float64