In [1]:
import numpy as np
import pandas as pd

In [2]:
# Series
s = pd.Series([1, 2, np.nan])

In [3]:
s

0    1.0
1    2.0
2    NaN
dtype: float64

In [4]:
# インデックスで中身を確認
s[0]

1.0

In [5]:
# 合計
s.sum()

3.0

In [6]:
s.max()

2.0

In [7]:
s.min()

1.0

In [8]:
# DataFrame
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df

Unnamed: 0,A,B
0,1,3
1,2,4


In [11]:
# 型の確認
df.dtypes

A    int64
B    int64
dtype: object

In [13]:
df = pd.DataFrame(np.random.randn(6, 4))
df

Unnamed: 0,0,1,2,3
0,-0.855948,-0.328586,-0.39394,-0.241277
1,1.254112,-0.810096,-1.116956,-0.928566
2,-0.309915,-0.16902,-0.002206,0.233271
3,-0.694134,-0.445039,-1.438148,-1.432451
4,0.201931,-1.138103,0.009724,0.197287
5,0.094642,0.103783,-0.639418,-1.237098


In [15]:
# 20180101から連続する日付をインデックスにする
df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range('20180101', periods=6))
df

Unnamed: 0,0,1,2,3
2018-01-01,0.549461,-0.828246,-2.655959,0.353077
2018-01-02,1.564023,0.555411,0.333703,-0.212719
2018-01-03,-0.255737,0.777503,-0.671694,-1.092559
2018-01-04,1.028667,-2.140137,-1.912363,-0.800116
2018-01-05,-0.23741,2.165552,-0.132443,0.373534
2018-01-06,-1.782205,0.111849,0.472001,-0.383796


In [16]:
# 上記にカラムを追加する
df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range('20180101', periods=6), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866
2018-01-06,0.345688,-1.307276,1.033196,0.229869


In [17]:
# 先頭のデータを確認
# デフォルトは5
df.head(1)

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749


In [18]:
# 後ろから3つのデータを確認
df.tail(3)

Unnamed: 0,A,B,C,D
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866
2018-01-06,0.345688,-1.307276,1.033196,0.229869


In [19]:
# インデックスの確認
df.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
# カラムの確認
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [25]:
# データの中身を確認
df.values

array([[ 0.36636786,  1.30522159,  0.78026531, -0.2777489 ],
       [-1.05943791,  0.38675951, -1.96392554, -2.38030489],
       [ 2.13319973,  1.42072279,  0.11383035, -1.23494527],
       [-1.13595223, -0.23188421, -0.36787498, -0.30834225],
       [-1.2904422 , -0.34617451, -0.84619837, -0.67486551],
       [ 0.34568829, -1.30727617,  1.03319554,  0.22986909]])

In [23]:
# 統計情報を確認
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.106763,0.204562,-0.208451,-0.77439
std,1.327698,1.049109,1.107875,0.924486
min,-1.290442,-1.307276,-1.963926,-2.380305
25%,-1.116824,-0.317602,-0.726618,-1.094925
50%,-0.356875,0.077438,-0.127022,-0.491604
75%,0.361198,1.075606,0.613657,-0.285397
max,2.1332,1.420723,1.033196,0.229869


In [26]:
# 列と行を入れ替える
df.T

Unnamed: 0,2018-01-01 00:00:00,2018-01-02 00:00:00,2018-01-03 00:00:00,2018-01-04 00:00:00,2018-01-05 00:00:00,2018-01-06 00:00:00
A,0.366368,-1.059438,2.1332,-1.135952,-1.290442,0.345688
B,1.305222,0.38676,1.420723,-0.231884,-0.346175,-1.307276
C,0.780265,-1.963926,0.11383,-0.367875,-0.846198,1.033196
D,-0.277749,-2.380305,-1.234945,-0.308342,-0.674866,0.229869


In [28]:
# カラムを指定してソート
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2018-01-06,0.345688,-1.307276,1.033196,0.229869
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-03,2.1332,1.420723,0.11383,-1.234945


In [29]:
df[0:3]

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945


In [31]:
# インデックスで範囲を指定
df['20180102':'20180104']

Unnamed: 0,A,B,C,D
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342


In [32]:
# 20180101の値を取り出す
df.loc['20180101']

A    0.366368
B    1.305222
C    0.780265
D   -0.277749
Name: 2018-01-01 00:00:00, dtype: float64

In [34]:
# 上記にカラム指定を追加
df.loc['20180101', ['A','B']]

A    0.366368
B    1.305222
Name: 2018-01-01 00:00:00, dtype: float64

In [35]:
# 必要な部分のインデックスとカラムを指定して値を取り出す
df.loc['20180101':'20180104', ['A', 'C']]

Unnamed: 0,A,C
2018-01-01,0.366368,0.780265
2018-01-02,-1.059438,-1.963926
2018-01-03,2.1332,0.11383
2018-01-04,-1.135952,-0.367875


In [36]:
df.loc[:, ['A', 'D']]

Unnamed: 0,A,D
2018-01-01,0.366368,-0.277749
2018-01-02,-1.059438,-2.380305
2018-01-03,2.1332,-1.234945
2018-01-04,-1.135952,-0.308342
2018-01-05,-1.290442,-0.674866
2018-01-06,0.345688,0.229869


In [37]:
# 列とカラムの番号で指定する
df.iloc[0, 0]

0.36636785877202671

In [38]:
df.iloc[0:2, 0:2]

Unnamed: 0,A,B
2018-01-01,0.366368,1.305222
2018-01-02,-1.059438,0.38676


In [39]:
# 条件をつけて出力
df[df.B > 0]

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945


In [40]:
df[df > 0]

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,
2018-01-02,,0.38676,,
2018-01-03,2.1332,1.420723,0.11383,
2018-01-04,,,,
2018-01-05,,,,
2018-01-06,0.345688,,1.033196,0.229869


In [41]:
df

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866
2018-01-06,0.345688,-1.307276,1.033196,0.229869


In [42]:
# dfをコピー
df2 = df.copy()

In [43]:
df2

Unnamed: 0,A,B,C,D
2018-01-01,0.366368,1.305222,0.780265,-0.277749
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305
2018-01-03,2.1332,1.420723,0.11383,-1.234945
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866
2018-01-06,0.345688,-1.307276,1.033196,0.229869


In [44]:
# E列を追加
df2['E'] = ['one', 'one', 'two', 'three', 'two', 'three']
df2

Unnamed: 0,A,B,C,D,E
2018-01-01,0.366368,1.305222,0.780265,-0.277749,one
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305,one
2018-01-03,2.1332,1.420723,0.11383,-1.234945,two
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342,three
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866,two
2018-01-06,0.345688,-1.307276,1.033196,0.229869,three


In [45]:
# df2のE列で値がoneとthreeを取り出す
df2[df2['E'].isin(['one', 'three'])]

Unnamed: 0,A,B,C,D,E
2018-01-01,0.366368,1.305222,0.780265,-0.277749,one
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305,one
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342,three
2018-01-06,0.345688,-1.307276,1.033196,0.229869,three


In [48]:
# ~をつけて、oneとthreeでないものを取り出す
df2[~df2['E'].isin(['one', 'three'])]

Unnamed: 0,A,B,C,D,E
2018-01-03,2.1332,1.420723,0.11383,-1.234945,two
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866,two


In [51]:
# Seriesを作成してdfに代入
s = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20180101', periods=6))
s

2018-01-01    1
2018-01-02    2
2018-01-03    3
2018-01-04    4
2018-01-05    5
2018-01-06    6
Freq: D, dtype: int64

In [52]:
df['E'] = s
df

Unnamed: 0,A,B,C,D,E
2018-01-01,0.366368,1.305222,0.780265,-0.277749,1
2018-01-02,-1.059438,0.38676,-1.963926,-2.380305,2
2018-01-03,2.1332,1.420723,0.11383,-1.234945,3
2018-01-04,-1.135952,-0.231884,-0.367875,-0.308342,4
2018-01-05,-1.290442,-0.346175,-0.846198,-0.674866,5
2018-01-06,0.345688,-1.307276,1.033196,0.229869,6


In [53]:
# データを一行ずらす
df.shift(1)

Unnamed: 0,A,B,C,D,E
2018-01-01,,,,,
2018-01-02,0.366368,1.305222,0.780265,-0.277749,1.0
2018-01-03,-1.059438,0.38676,-1.963926,-2.380305,2.0
2018-01-04,2.1332,1.420723,0.11383,-1.234945,3.0
2018-01-05,-1.135952,-0.231884,-0.367875,-0.308342,4.0
2018-01-06,-1.290442,-0.346175,-0.846198,-0.674866,5.0


In [54]:
df = pd.DataFrame(np.random.randn(2,2))
df

Unnamed: 0,0,1
0,0.631026,-0.705598
1,1.088095,-1.089932


In [57]:
# 下にくっつける
pd.concat([df, df])

Unnamed: 0,0,1
0,0.631026,-0.705598
1,1.088095,-1.089932
0,0.631026,-0.705598
1,1.088095,-1.089932


In [58]:
df = pd.DataFrame(np.random.rand(8, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.827383,0.628291,0.211198,0.15749
1,0.736622,0.354569,0.083995,0.816505
2,0.338716,0.413456,0.696613,0.909884
3,0.717223,0.396361,0.484068,0.746093
4,0.750767,0.816962,0.911059,0.296144
5,0.37686,0.142136,0.703629,0.526715
6,0.142249,0.935757,0.586112,0.259526
7,0.504599,0.08994,0.85835,0.228079


In [59]:
# 下にappendする
s = df.iloc[0]
s

A    0.827383
B    0.628291
C    0.211198
D    0.157490
Name: 0, dtype: float64

In [60]:
df.append(s)

Unnamed: 0,A,B,C,D
0,0.827383,0.628291,0.211198,0.15749
1,0.736622,0.354569,0.083995,0.816505
2,0.338716,0.413456,0.696613,0.909884
3,0.717223,0.396361,0.484068,0.746093
4,0.750767,0.816962,0.911059,0.296144
5,0.37686,0.142136,0.703629,0.526715
6,0.142249,0.935757,0.586112,0.259526
7,0.504599,0.08994,0.85835,0.228079
0,0.827383,0.628291,0.211198,0.15749


In [61]:
# appendした際にインデックスを無視する
df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,0.827383,0.628291,0.211198,0.15749
1,0.736622,0.354569,0.083995,0.816505
2,0.338716,0.413456,0.696613,0.909884
3,0.717223,0.396361,0.484068,0.746093
4,0.750767,0.816962,0.911059,0.296144
5,0.37686,0.142136,0.703629,0.526715
6,0.142249,0.935757,0.586112,0.259526
7,0.504599,0.08994,0.85835,0.228079
8,0.827383,0.628291,0.211198,0.15749


In [62]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'], 'B': np.random.randn(4)})
df

Unnamed: 0,A,B
0,foo,2.142463
1,bar,0.534689
2,foo,-0.8063
3,bar,0.141175


In [65]:
# グルーピング
# Aのインデックスのそれぞれの合計
df.groupby('A').sum()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
bar,0.675864
foo,1.336163
