In [1]:
import numpy as np
import numpy.random as random
import scipy as sp
import pandas as pd
from pandas import Series, DataFrame

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline

%precision 3

'%.3f'

<h1>階層型インデックス</h1>

In [2]:
# 3行3列のデータを作成し、インデックスとカラムを設定
hire_df = DataFrame(
    np.arange(9).reshape((3,3)),
    index = [
        ['a','a','b'],
        [1,2,2]
    ],
    columns = [
        ['Osaka','Tokyo','Osaka'],
        ['Blue','Red','Red']
    ]
)
hire_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Osaka,Tokyo,Osaka
Unnamed: 0_level_1,Unnamed: 1_level_1,Blue,Red,Red
a,1,0,1,2
a,2,3,4,5
b,2,6,7,8


In [3]:
# indexに名前をつける
hire_df.index.names = ['key1','key2']
# カラムに名前をつける
hire_df.columns.names = ['city','color']
hire_df

Unnamed: 0_level_0,city,Osaka,Tokyo,Osaka
Unnamed: 0_level_1,color,Blue,Red,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,2,6,7,8


<h3>カラムの絞り込み</h3>

In [4]:
hire_df['Osaka']

Unnamed: 0_level_0,color,Blue,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,2
a,2,3,5
b,2,6,8


<h3>インデックスを軸にした集計</h3>

In [5]:
hire_df.groupby(level='key2').sum()

city,Osaka,Tokyo,Osaka
color,Blue,Red,Red
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,9,11,13


In [13]:
hire_df.groupby(level='color', axis=1).sum()

Unnamed: 0_level_0,color,Blue,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,3
a,2,3,9
b,2,6,15


<h3>インデックス要素の削除</h3>

In [14]:
hire_df.drop(['b'])

Unnamed: 0_level_0,city,Osaka,Tokyo,Osaka
Unnamed: 0_level_1,color,Blue,Red,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5


<h1>データの結合</h1>

In [16]:
# データ　1
data1 = {
    'id': ['100', '101', '102', '103', '104', '106', '108', '110', '111', '113'],
    'city': ['Tokyo', 'Osaka', 'Kyoto', 'Hokkaido', 'Tokyo', 'Tokyo', 'Osaka', 'Kyoto', 'Hokkaido',
             'Tokyo'],
    'birth_year': [1990, 1989, 1992, 1997, 1982, 1991, 1988, 1990, 1995, 1981],
    'name': ['Hiroshi', 'Akiko', 'Yuki', 'Satoru', 'Steeve', 'Mituru', 'Aoi', 'Tarou', 'Suguru', 'Mitsuo']
}
df1 = DataFrame(data1)
df1

Unnamed: 0,id,city,birth_year,name
0,100,Tokyo,1990,Hiroshi
1,101,Osaka,1989,Akiko
2,102,Kyoto,1992,Yuki
3,103,Hokkaido,1997,Satoru
4,104,Tokyo,1982,Steeve
5,106,Tokyo,1991,Mituru
6,108,Osaka,1988,Aoi
7,110,Kyoto,1990,Tarou
8,111,Hokkaido,1995,Suguru
9,113,Tokyo,1981,Mitsuo


In [17]:
# データ　2
data2 = {
    'id': ['100', '101', '102', '105', '107'],
    'math': [50, 43, 33, 76, 98],
    'english': [90, 30, 20, 50, 30],
    'sex': ['M', 'F', 'F', 'M', 'M'],
    'index_num': [0, 1, 2, 3, 4]
}
df2 = DataFrame(data2)
df2

Unnamed: 0,id,math,english,sex,index_num
0,100,50,90,M,0
1,101,43,30,F,1
2,102,33,20,F,2
3,105,76,50,M,3
4,107,98,30,M,4


<h3>内部結合</h3>

In [18]:
# データのマージ（内部結合、キーは自動的に認識されるが、onで明示的に指定可能）
print('・結合テーブル')
pd.merge(df1, df2, on = 'id')

・結合テーブル


Unnamed: 0,id,city,birth_year,name,math,english,sex,index_num
0,100,Tokyo,1990,Hiroshi,50,90,M,0
1,101,Osaka,1989,Akiko,43,30,F,1
2,102,Kyoto,1992,Yuki,33,20,F,2


<h3>全結合</h3>

In [19]:
# データのマージ（全結合）
pd.merge(df1, df2, how = 'outer')

Unnamed: 0,id,city,birth_year,name,math,english,sex,index_num
0,100,Tokyo,1990.0,Hiroshi,50.0,90.0,M,0.0
1,101,Osaka,1989.0,Akiko,43.0,30.0,F,1.0
2,102,Kyoto,1992.0,Yuki,33.0,20.0,F,2.0
3,103,Hokkaido,1997.0,Satoru,,,,
4,104,Tokyo,1982.0,Steeve,,,,
5,106,Tokyo,1991.0,Mituru,,,,
6,108,Osaka,1988.0,Aoi,,,,
7,110,Kyoto,1990.0,Tarou,,,,
8,111,Hokkaido,1995.0,Suguru,,,,
9,113,Tokyo,1981.0,Mitsuo,,,,


In [21]:
# indexによるマージ
pd.merge(df1, df2, left_index = True, right_on = 'index_num')

Unnamed: 0,id_x,city,birth_year,name,id_y,math,english,sex,index_num
0,100,Tokyo,1990,Hiroshi,100,50,90,M,0
1,101,Osaka,1989,Akiko,101,43,30,F,1
2,102,Kyoto,1992,Yuki,102,33,20,F,2
3,103,Hokkaido,1997,Satoru,105,76,50,M,3
4,104,Tokyo,1982,Steeve,107,98,30,M,4


<h3>左外部結合</h3>

In [22]:
# データのマージ（left）
pd.merge(df1, df2, how = 'left')

Unnamed: 0,id,city,birth_year,name,math,english,sex,index_num
0,100,Tokyo,1990,Hiroshi,50.0,90.0,M,0.0
1,101,Osaka,1989,Akiko,43.0,30.0,F,1.0
2,102,Kyoto,1992,Yuki,33.0,20.0,F,2.0
3,103,Hokkaido,1997,Satoru,,,,
4,104,Tokyo,1982,Steeve,,,,
5,106,Tokyo,1991,Mituru,,,,
6,108,Osaka,1988,Aoi,,,,
7,110,Kyoto,1990,Tarou,,,,
8,111,Hokkaido,1995,Suguru,,,,
9,113,Tokyo,1981,Mitsuo,,,,


<h3>縦結合</h3>

In [24]:
# データ 3
data3 = {
    'id': ['117', '118', '119', '120', '125'],
    'city': ['Chiba', 'Kanagawa', 'Tokyo', 'Fukuoka', 'Okinawa'],
    'birth_year': [1990, 1989, 1992, 1997, 1982],
    'name': ['Suguru', 'Kouichi', 'Satoshi', 'Yukie', 'Akari']
}
df3 = DataFrame(data3)
df3

Unnamed: 0,id,city,birth_year,name
0,117,Chiba,1990,Suguru
1,118,Kanagawa,1989,Kouichi
2,119,Tokyo,1992,Satoshi
3,120,Fukuoka,1997,Yukie
4,125,Okinawa,1982,Akari


In [25]:
# concat 縦結合
concat_data = pd.concat([df1, df3])
concat_data

Unnamed: 0,id,city,birth_year,name
0,100,Tokyo,1990,Hiroshi
1,101,Osaka,1989,Akiko
2,102,Kyoto,1992,Yuki
3,103,Hokkaido,1997,Satoru
4,104,Tokyo,1982,Steeve
5,106,Tokyo,1991,Mituru
6,108,Osaka,1988,Aoi
7,110,Kyoto,1990,Tarou
8,111,Hokkaido,1995,Suguru
9,113,Tokyo,1981,Mitsuo


<h1>データの操作と変換</h1>

<h3>ピボット操作</h3>

In [26]:
hire_df = DataFrame(
    np.arange(9).reshape((3, 3)),
    index = [
        ['a','a','b'],
        [1, 2, 2]
    ],
    columns = [
        ['Osaka', 'Tokyo', 'Osaka'],
        ['Blue', 'Red', 'Red']
    ]
)
hire_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Osaka,Tokyo,Osaka
Unnamed: 0_level_1,Unnamed: 1_level_1,Blue,Red,Red
a,1,0,1,2
a,2,3,4,5
b,2,6,7,8


In [27]:
# ピボット操作で「Bule、Red」の列を行に変更
hire_df.stack()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Osaka,Tokyo
a,1,Blue,0,
a,1,Red,2,1.0
a,2,Blue,3,
a,2,Red,5,4.0
b,2,Blue,6,
b,2,Red,8,7.0


In [28]:
# unstackメソッドで、「Bule、Red」の行を列に変更
hire_df.stack().unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Osaka,Osaka,Tokyo,Tokyo
Unnamed: 0_level_1,Unnamed: 1_level_1,Blue,Red,Blue,Red
a,1,0,2,,1.0
a,2,3,5,,4.0
b,2,6,8,,7.0


<h3>重複データの除去</h3>

In [29]:
# 重複があるデータ
dupli_data = DataFrame({
    'col1': [1, 1, 2, 3, 4, 4, 6, 6],
    'col2': ['a', 'b', 'b', 'b', 'c', 'c', 'b', 'b']
})
print('・元データ')
dupli_data

・元データ


Unnamed: 0,col1,col2
0,1,a
1,1,b
2,2,b
3,3,b
4,4,c
5,4,c
6,6,b
7,6,b


In [30]:
# 重複判定
dupli_data.duplicated()

0    False
1    False
2    False
3    False
4    False
5     True
6    False
7     True
dtype: bool

In [31]:
# 重複削除
dupli_data.drop_duplicates()

Unnamed: 0,col1,col2
0,1,a
1,1,b
2,2,b
3,3,b
4,4,c
6,6,b


<h3>マッピング処理</h3>

In [32]:
# 参照データ
city_map = {
    'Tokyo': 'Kanto',
    'Hokkaido': 'Hokkaido',
    'Osaka': 'Kansai',
    'Kyoto': 'Kansai'
}
city_map

{'Tokyo': 'Kanto',
 'Hokkaido': 'Hokkaido',
 'Osaka': 'Kansai',
 'Kyoto': 'Kansai'}

In [33]:
# 参照データを結合
df1['region'] = df1['city'].map(city_map)
df1

Unnamed: 0,id,city,birth_year,name,region
0,100,Tokyo,1990,Hiroshi,Kanto
1,101,Osaka,1989,Akiko,Kansai
2,102,Kyoto,1992,Yuki,Kansai
3,103,Hokkaido,1997,Satoru,Hokkaido
4,104,Tokyo,1982,Steeve,Kanto
5,106,Tokyo,1991,Mituru,Kanto
6,108,Osaka,1988,Aoi,Kansai
7,110,Kyoto,1990,Tarou,Kansai
8,111,Hokkaido,1995,Suguru,Hokkaido
9,113,Tokyo,1981,Mitsuo,Kanto


<h3>無名関数とmapを組み合わせる</h3>

In [34]:
df1['up_two_num'] = df1['birth_year'].map(lambda x: str(x)[0:3])
df1

Unnamed: 0,id,city,birth_year,name,region,up_two_num
0,100,Tokyo,1990,Hiroshi,Kanto,199
1,101,Osaka,1989,Akiko,Kansai,198
2,102,Kyoto,1992,Yuki,Kansai,199
3,103,Hokkaido,1997,Satoru,Hokkaido,199
4,104,Tokyo,1982,Steeve,Kanto,198
5,106,Tokyo,1991,Mituru,Kanto,199
6,108,Osaka,1988,Aoi,Kansai,198
7,110,Kyoto,1990,Tarou,Kansai,199
8,111,Hokkaido,1995,Suguru,Hokkaido,199
9,113,Tokyo,1981,Mitsuo,Kanto,198


<h3>ビン分割</h3>

In [35]:
# 分割の粒度
birth_year_bins = [1980, 1985, 1990, 1995, 2000]

# ビン分割の実施
birth_year_cut_data = pd.cut(df1.birth_year, birth_year_bins)
birth_year_cut_data

0    (1985, 1990]
1    (1985, 1990]
2    (1990, 1995]
3    (1995, 2000]
4    (1980, 1985]
5    (1990, 1995]
6    (1985, 1990]
7    (1985, 1990]
8    (1990, 1995]
9    (1980, 1985]
Name: birth_year, dtype: category
Categories (4, interval[int64, right]): [(1980, 1985] < (1985, 1990] < (1990, 1995] < (1995, 2000]]

In [36]:
# 集計データ
pd.value_counts(birth_year_cut_data)

birth_year
(1985, 1990]    4
(1990, 1995]    3
(1980, 1985]    2
(1995, 2000]    1
Name: count, dtype: int64

In [37]:
# 名前をつける
group_names = ['early1980s', 'late1980s', 'early1990s', 'late1990s']
birth_year_cut_data = pd.cut(df1.birth_year, birth_year_bins, labels = group_names)
pd.value_counts(birth_year_cut_data)

birth_year
late1980s     4
early1990s    3
early1980s    2
late1990s     1
Name: count, dtype: int64

In [38]:
# 数字で分割指定可能 ここでは2つに分割
pd.cut(df1.birth_year, 2)

0      (1989.0, 1997.0]
1    (1980.984, 1989.0]
2      (1989.0, 1997.0]
3      (1989.0, 1997.0]
4    (1980.984, 1989.0]
5      (1989.0, 1997.0]
6    (1980.984, 1989.0]
7      (1989.0, 1997.0]
8      (1989.0, 1997.0]
9    (1980.984, 1989.0]
Name: birth_year, dtype: category
Categories (2, interval[float64, right]): [(1980.984, 1989.0] < (1989.0, 1997.0]]

In [39]:
pd.value_counts(pd.qcut(df1.birth_year, 2))

birth_year
(1980.999, 1990.0]    6
(1990.0, 1997.0]      4
Name: count, dtype: int64

<h1>データの集約とグループ演算</h1>

In [40]:
# データを用意（確認）、ただし、region付き
df1

Unnamed: 0,id,city,birth_year,name,region,up_two_num
0,100,Tokyo,1990,Hiroshi,Kanto,199
1,101,Osaka,1989,Akiko,Kansai,198
2,102,Kyoto,1992,Yuki,Kansai,199
3,103,Hokkaido,1997,Satoru,Hokkaido,199
4,104,Tokyo,1982,Steeve,Kanto,198
5,106,Tokyo,1991,Mituru,Kanto,199
6,108,Osaka,1988,Aoi,Kansai,198
7,110,Kyoto,1990,Tarou,Kansai,199
8,111,Hokkaido,1995,Suguru,Hokkaido,199
9,113,Tokyo,1981,Mitsuo,Kanto,198


In [41]:
# サイズ情報
df1.groupby('city').size()

city
Hokkaido    2
Kyoto       2
Osaka       2
Tokyo       4
dtype: int64

In [42]:
# cityを軸に、birth_yearの平均値を求める
df1.groupby('city')['birth_year'].mean()

city
Hokkaido    1996.0
Kyoto       1991.0
Osaka       1988.5
Tokyo       1986.0
Name: birth_year, dtype: float64

In [43]:
df1.groupby(['region', 'city'])['birth_year'].mean()

region    city    
Hokkaido  Hokkaido    1996.0
Kansai    Kyoto       1991.0
          Osaka       1988.5
Kanto     Tokyo       1986.0
Name: birth_year, dtype: float64

In [44]:
df1.groupby(['region', 'city'], as_index = False)['birth_year'].mean()

Unnamed: 0,region,city,birth_year
0,Hokkaido,Hokkaido,1996.0
1,Kansai,Kyoto,1991.0
2,Kansai,Osaka,1988.5
3,Kanto,Tokyo,1986.0


In [45]:
for group, subdf in df1.groupby('region'):
    print('===========================================================')
    print('Region Name:{0}'.format(group))
    print(subdf)

Region Name:Hokkaido
    id      city  birth_year    name    region up_two_num
3  103  Hokkaido        1997  Satoru  Hokkaido        199
8  111  Hokkaido        1995  Suguru  Hokkaido        199
Region Name:Kansai
    id   city  birth_year   name  region up_two_num
1  101  Osaka        1989  Akiko  Kansai        198
2  102  Kyoto        1992   Yuki  Kansai        199
6  108  Osaka        1988    Aoi  Kansai        198
7  110  Kyoto        1990  Tarou  Kansai        199
Region Name:Kanto
    id   city  birth_year     name region up_two_num
0  100  Tokyo        1990  Hiroshi  Kanto        199
4  104  Tokyo        1982   Steeve  Kanto        198
5  106  Tokyo        1991   Mituru  Kanto        199
9  113  Tokyo        1981   Mitsuo  Kanto        198


In [46]:
mkdir chap06

In [47]:
cd ./chap06

/Users/kuramotoyuuta/Documents/DataScience/Tokyo.unv/chapter_06/chap06


In [48]:
import requests, zipfile
from io import StringIO
import io

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00356/student.zip'

# データをurlから取得する
r = requests.get(url, stream=True)

# zipfileを読み込み展開する
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

In [50]:
student_data_math = pd.read_csv('student-mat.csv', sep=';')

# 列に複数の関数を適用
functions = ['count', 'mean', 'max', 'min']
grouped_student_math_data1 = student_data_math.groupby(['sex', 'address'])
grouped_student_math_data1[['age', 'G1']].agg(functions)

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age,age,age,G1,G1,G1,G1
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,max,min,count,mean,max,min
sex,address,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
F,R,44,16.977273,19,15,44,10.295455,19,6
F,U,164,16.664634,20,15,164,10.707317,18,4
M,R,44,17.113636,21,15,44,10.659091,18,3
M,U,143,16.517483,22,15,143,11.405594,19,5


<h1>欠損データの扱い方</h1>

In [51]:
# データの準備
import numpy as np
from numpy import nan as NA
import pandas as pd

df = pd.DataFrame(np.random.rand(10, 4))

# NAにする
df.iloc[1,0] = NA
df.iloc[2:3,2] = NA
df.iloc[5:,3] = NA

In [52]:
df

Unnamed: 0,0,1,2,3
0,0.419833,0.977422,0.035429,0.669039
1,,0.091851,0.107041,0.39507
2,0.465085,0.466959,,0.019438
3,0.255224,0.741896,0.84235,0.992131
4,0.648789,0.883975,0.949259,0.873564
5,0.289462,0.319237,0.206461,
6,0.110558,0.95127,0.824851,
7,0.938977,0.007641,0.640988,
8,0.421649,0.419353,0.265055,
9,0.016679,0.080083,0.5924,


<h3>リストワイズ削除</h3>

In [54]:
df.dropna()

Unnamed: 0,0,1,2,3
0,0.419833,0.977422,0.035429,0.669039
3,0.255224,0.741896,0.84235,0.992131
4,0.648789,0.883975,0.949259,0.873564


<h3>ペアワイズ削除</h3>

In [55]:
df[[0,1]].dropna()

Unnamed: 0,0,1
0,0.419833,0.977422
2,0.465085,0.466959
3,0.255224,0.741896
4,0.648789,0.883975
5,0.289462,0.319237
6,0.110558,0.95127
7,0.938977,0.007641
8,0.421649,0.419353
9,0.016679,0.080083


<h3>fillnaで埋める</h3>

In [56]:
df.fillna(0)

Unnamed: 0,0,1,2,3
0,0.419833,0.977422,0.035429,0.669039
1,0.0,0.091851,0.107041,0.39507
2,0.465085,0.466959,0.0,0.019438
3,0.255224,0.741896,0.84235,0.992131
4,0.648789,0.883975,0.949259,0.873564
5,0.289462,0.319237,0.206461,0.0
6,0.110558,0.95127,0.824851,0.0
7,0.938977,0.007641,0.640988,0.0
8,0.421649,0.419353,0.265055,0.0
9,0.016679,0.080083,0.5924,0.0


<h3>前の値で埋める</h3>

In [57]:
df.fillna(method = 'ffill')

Unnamed: 0,0,1,2,3
0,0.419833,0.977422,0.035429,0.669039
1,0.419833,0.091851,0.107041,0.39507
2,0.465085,0.466959,0.107041,0.019438
3,0.255224,0.741896,0.84235,0.992131
4,0.648789,0.883975,0.949259,0.873564
5,0.289462,0.319237,0.206461,0.873564
6,0.110558,0.95127,0.824851,0.873564
7,0.938977,0.007641,0.640988,0.873564
8,0.421649,0.419353,0.265055,0.873564
9,0.016679,0.080083,0.5924,0.873564


<h3>平均値で埋める</h3>

In [58]:
# 各カラムの平均値(確認用)
df.mean()

0    0.396251
1    0.493969
2    0.495981
3    0.589848
dtype: float64

In [59]:
# 平均値で埋める
df.fillna(df.mean())

Unnamed: 0,0,1,2,3
0,0.419833,0.977422,0.035429,0.669039
1,0.396251,0.091851,0.107041,0.39507
2,0.465085,0.466959,0.495981,0.019438
3,0.255224,0.741896,0.84235,0.992131
4,0.648789,0.883975,0.949259,0.873564
5,0.289462,0.319237,0.206461,0.589848
6,0.110558,0.95127,0.824851,0.589848
7,0.938977,0.007641,0.640988,0.589848
8,0.421649,0.419353,0.265055,0.589848
9,0.016679,0.080083,0.5924,0.589848


<h1>時系列データの処理と変換</h1>

In [61]:
import pandas_datareader.data as pdr

In [62]:
start_date = '2001/1/2'
end_date = '2016/12/30'

fx_jpusdata = pdr.DataReader('DEXJPUS', 'fred', start_date, end_date)

In [63]:
fx_jpusdata.head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,114.73
2001-01-03,114.26
2001-01-04,115.47
2001-01-05,116.19
2001-01-08,115.97


<h3>特定の年月のデータを参照する</h3>

In [68]:
fx_jpusdata.loc['2016-04']

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2016-04-01,112.06
2016-04-04,111.18
2016-04-05,110.26
2016-04-06,109.63
2016-04-07,107.98
2016-04-08,108.36
2016-04-11,107.96
2016-04-12,108.54
2016-04-13,109.21
2016-04-14,109.2


In [69]:
fx_jpusdata.resample('M').last().head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-31,116.39
2001-02-28,117.28
2001-03-31,125.54
2001-04-30,123.57
2001-05-31,118.88


<h3>欠損がある場合の操作</h3>

In [70]:
fx_jpusdata.resample('D').last().head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,114.73
2001-01-03,114.26
2001-01-04,115.47
2001-01-05,116.19
2001-01-06,


In [71]:
fx_jpusdata.resample('D').ffill().head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,114.73
2001-01-03,114.26
2001-01-04,115.47
2001-01-05,116.19
2001-01-06,116.19


<h3>データをズラして比率を計算する</h3>

In [72]:
fx_jpusdata.shift(1).head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,
2001-01-03,114.73
2001-01-04,114.26
2001-01-05,115.47
2001-01-08,116.19


In [74]:
fx_jpusdata_ratio = fx_jpusdata / fx_jpusdata.shift(1)
fx_jpusdata_ratio.head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,
2001-01-03,0.995903
2001-01-04,1.01059
2001-01-05,1.006235
2001-01-08,0.998107


<h3>移動平均</h3>

In [75]:
fx_jpusdata.head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,114.73
2001-01-03,114.26
2001-01-04,115.47
2001-01-05,116.19
2001-01-08,115.97


In [76]:
fx_jpusdata.rolling(3).mean().head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,
2001-01-03,
2001-01-04,114.82
2001-01-05,115.306667
2001-01-08,115.876667


In [77]:
# 3日間の標準偏差の推移
fx_jpusdata.rolling(3).std().head()

Unnamed: 0_level_0,DEXJPUS
DATE,Unnamed: 1_level_1
2001-01-02,
2001-01-03,
2001-01-04,0.61
2001-01-05,0.975312
2001-01-08,0.368963
