# Google 股價為範例，操作時間序列的技巧

In [29]:
import pandas as pd
import datetime as dt
import pandas_datareader.data as data

# 舊程式碼
# start = dt.datetime(2020, 3, 1)
# end = dt.datetime(2020, 4, 30)
# df = data.get_data_yahoo('GOOG', start, end) ('股票名稱','起始時間','結束時間')
# df

# 更新後的程式碼
df = data.DataReader('AAPL', data_source = 'stooq', start='2022-11-01',end='2022-12-31')
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-01,155.080002,155.449997,149.130005,150.649994,150.400497,80379300
2022-11-02,148.949997,152.169998,145.0,145.029999,144.78981,93604600
2022-11-03,142.059998,142.800003,138.75,138.880005,138.650009,97918500
2022-11-04,142.089996,142.669998,134.380005,138.380005,138.380005,140814800
2022-11-07,137.110001,139.149994,135.669998,138.919998,138.919998,83374600
2022-11-08,140.410004,141.429993,137.490005,139.5,139.5,89908500
2022-11-09,138.5,138.550003,134.589996,134.869995,134.869995,74917800
2022-11-10,141.240005,146.869995,139.5,146.869995,146.869995,118854000
2022-11-11,145.820007,150.009995,144.369995,149.699997,149.699997,93979700
2022-11-14,148.970001,150.279999,147.429993,148.279999,148.279999,73374100


## DatetimeIndex
用時間作為索引的型態<br>
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html

In [30]:
#不是使用'Date'欄位名稱，使用index，是一個 DatetimeIndex物件，week為屬性
#返回每個日期所属的週數
df.index.week

  df.index.week


Int64Index([44, 44, 44, 44, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 47, 47, 47,
            47, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51,
            51, 51, 51, 51, 52, 52, 52, 52],
           dtype='int64', name='Date')

## 計算每週交易量總和

In [31]:
df['week'] = df.index.week
df

  df['week'] = df.index.week


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-11-01,155.080002,155.449997,149.130005,150.649994,150.400497,80379300,44
2022-11-02,148.949997,152.169998,145.0,145.029999,144.78981,93604600,44
2022-11-03,142.059998,142.800003,138.75,138.880005,138.650009,97918500,44
2022-11-04,142.089996,142.669998,134.380005,138.380005,138.380005,140814800,44
2022-11-07,137.110001,139.149994,135.669998,138.919998,138.919998,83374600,45
2022-11-08,140.410004,141.429993,137.490005,139.5,139.5,89908500,45
2022-11-09,138.5,138.550003,134.589996,134.869995,134.869995,74917800,45
2022-11-10,141.240005,146.869995,139.5,146.869995,146.869995,118854000,45
2022-11-11,145.820007,150.009995,144.369995,149.699997,149.699997,93979700,45
2022-11-14,148.970001,150.279999,147.429993,148.279999,148.279999,73374100,46


In [32]:
df.groupby('week').Volume.sum()

week
44    412717200
45    461034600
46    382679700
47    204025500
48    401088500
49    341500000
50    505728900
51    384620400
52    307110500
Name: Volume, dtype: int64

## resample
調整週期重新採樣的方法<br>
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html#pandas.DataFrame.resample

In [33]:
ten = df.resample('10D').mean()
ten

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-11-01,143.18,144.886248,139.313751,141.637499,141.547539,97471510.0,44.5
2022-11-11,149.146667,151.321663,147.294996,149.803329,149.803329,79443230.0,45.833333
2022-11-21,146.697141,148.81,145.055714,147.255713,147.255713,66916600.0,47.428571
2022-12-01,145.128575,146.830002,143.135714,144.487143,144.487143,68313970.0,48.714286
2022-12-11,140.264287,141.29,136.775715,138.407144,138.407144,94679190.0,50.285714
2022-12-21,130.814286,132.379998,128.920001,130.735712,130.735712,76386500.0,51.571429
