# 2. 시계열 데이터 조작

In [2]:
import pandas as pd

## 1) pandas에서 DatetimeIndex 다루기
- Pandas는 DatetimeIndex를 사용하여 시간 데이터를 쉽게 인덱스화할 수 있습니다. 

In [3]:
# 문자열 리스트를 DatetimeIndex로 변환
dates = ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05']
datetime_index = pd.to_datetime(dates)

In [4]:
type(datetime_index[0])

pandas._libs.tslibs.timestamps.Timestamp

In [5]:
# 이 인덱스를 사용하여 DataFrame 생성
data = { "Value" : [100, 200, 300, 400, 500] }
df = pd.DataFrame(data, index=datetime_index, columns=["Value"])
df

Unnamed: 0,Value
2024-01-01,100
2024-01-02,200
2024-01-03,300
2024-01-04,400
2024-01-05,500


In [6]:
df.index[0]

Timestamp('2024-01-01 00:00:00')

In [7]:
# DatetimeIndex를 사용하여 특정 데이터에 접근
# string을 사용해서 값에 접근할 수 있음
df.loc["2024-01-03"]

Value    300
Name: 2024-01-03 00:00:00, dtype: int64

In [8]:
# 날짜 범위로 데이터 슬라이싱
df.loc["2024-01-01":"2024-01-02"]

Unnamed: 0,Value
2024-01-01,100
2024-01-02,200


In [10]:
# 일정한 빈도(예: 일별)의 날짜 범위 생성
date_range = pd.date_range(start="2024-01-01", end="2024-01-10", freq="D")
print(date_range)

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08',
               '2024-01-09', '2024-01-10'],
              dtype='datetime64[ns]', freq='D')


In [11]:
date_range[-1]

Timestamp('2024-01-10 00:00:00')

In [12]:
# 2시간 간격의 날짜 범위 생성
date_range = pd.date_range(start="2024-01-01", end="2024-01-10", freq="2h")
date_range

DatetimeIndex(['2024-01-01 00:00:00', '2024-01-01 02:00:00',
               '2024-01-01 04:00:00', '2024-01-01 06:00:00',
               '2024-01-01 08:00:00', '2024-01-01 10:00:00',
               '2024-01-01 12:00:00', '2024-01-01 14:00:00',
               '2024-01-01 16:00:00', '2024-01-01 18:00:00',
               ...
               '2024-01-09 06:00:00', '2024-01-09 08:00:00',
               '2024-01-09 10:00:00', '2024-01-09 12:00:00',
               '2024-01-09 14:00:00', '2024-01-09 16:00:00',
               '2024-01-09 18:00:00', '2024-01-09 20:00:00',
               '2024-01-09 22:00:00', '2024-01-10 00:00:00'],
              dtype='datetime64[ns]', length=109, freq='2h')

- 애플(aapl)의 일봉 데이터를 불러오기

In [13]:
df = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
df.head(3)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800


In [15]:
df["Date"].values[0]

'2023-07-19'

In [16]:
# 일봉 데이터의 날짜 값('Date' 열)을 datetime 형식으로 변환
df["timestamp"] = pd.to_datetime(df["Date"])
df = df.set_index("timestamp")

In [17]:
df.head(3)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800


In [18]:
# 2024-03-11부터 2024-03-15 데이터만 슬라이싱
df.loc["2024-03-11":"2024-03-15"]

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-03-11,2024-03-11,172.940002,174.380005,172.050003,172.75,172.516006,60139500
2024-03-12,2024-03-12,173.149994,174.029999,171.009995,173.229996,172.995346,59825400
2024-03-13,2024-03-13,172.770004,173.190002,170.759995,171.130005,170.898209,52488700
2024-03-14,2024-03-14,172.910004,174.309998,172.050003,173.0,172.765671,72913500
2024-03-15,2024-03-15,171.169998,172.619995,170.289993,172.619995,172.386185,121664700


- python에서 슬라이싱은 exclusive(마지막 데이터를 포함하지 않음) 합니다.
- pandas에서 슬라이싱은 inclusive(마지막 데이터를 포함) 하기 때문에 위 실행 결과 에서 `2024-03-15` 데이터가 포함됩니다.

In [19]:
l = ["a", "b", "c", "d", "e"]
l[0:3]

['a', 'b', 'c']

## 2) Shift 메소드
- `shift()` 함수는 시계열 데이터를 시간적으로 앞이나 뒤로 이동시키는 데 사용됩니다.


In [20]:
df = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
df['timestamp'] = pd.to_datetime(df['Date'])
df = df.set_index('timestamp')
df.head(5)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200


In [22]:
# Close를 한칸씩 뒤로 이동
df["Previous_Close"] = df["Close"].shift(1)
df.head(5)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,Previous_Close
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,195.100006
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.130005
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,191.940002
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.75


In [23]:
# Close 기준으로 전날 종가 대비 수익률을 계산
df["Daily_Return"] = (df["Close"] - df["Previous_Close"])/df["Previous_Close"]
df.head(5)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,Previous_Close,Daily_Return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,195.100006,-0.010097
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.130005,-0.006162
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,191.940002,0.00422
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.75,0.004514


## 3) Rolling 메소드
- `rolling()` 함수는 시계열 데이터에서 특정 기간(window)의 통계치를 계산할 때 매우 유용합니다.

In [25]:
df = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
df['timestamp'] = pd.to_datetime(df['Date'])
df = df.set_index('timestamp')
df.head(8)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200
2023-07-26,2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900
2023-07-27,2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200
2023-07-28,2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400


In [41]:
df["Close"].rolling(window=5).mean().head(10)

timestamp
2023-07-19           NaN
2023-07-20           NaN
2023-07-21           NaN
2023-07-24           NaN
2023-07-25    193.308002
2023-07-26    193.188000
2023-07-27    193.206000
2023-07-28    193.984000
2023-07-31    194.723999
2023-08-01    195.122000
Name: Close, dtype: float64

In [30]:
# Close 기준 3일 이동평균선(moving average) 계산
df["3_day_MA"] = df["Close"].rolling(window=3).mean()
df.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,3_day_MA,5_day_std,7_day_max_high,7_day_min_low
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,,,,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,,,,
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.390004,,,
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,192.606669,,,
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.769999,1.175063,,


In [29]:
# Close 기준 5일 이동표준편차(moving standard deviation) 계산
df["5_day_std"] = df["Close"].rolling(window=5).std()
df.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,3_day_MA,5_day_std,7_day_max_high,7_day_min_low
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,,,,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,,,,
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.390004,,,
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,192.606669,,,
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.769999,1.175063,,


In [28]:
# 7일을 window로 최대값, 최소값 계산
df["7_day_max_high"] = df["High"].rolling(window=7).max()
df["7_day_min_low"] = df["Low"].rolling(window=7).min()
df.head(10)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,3_day_MA,5_day_std,7_day_max_high,7_day_min_low
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,,,,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,,,,
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.390004,,,
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,192.606669,,,
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.769999,1.175063,,
2023-07-26,2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900,193.623332,0.956644,,
2023-07-27,2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200,193.779999,0.956126,198.229996,191.229996
2023-07-28,2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400,194.516668,1.215826,197.199997,191.229996
2023-07-31,2023-07-31,196.059998,196.490005,195.259995,196.449997,195.412201,38824100,195.166667,1.390442,197.199997,191.229996
2023-08-01,2023-08-01,196.240005,196.729996,195.279999,195.610001,194.576645,35175100,195.963333,1.275487,197.199997,192.25


- 실습) Close 기준 10일 볼린저 밴드(Bollinger Bands) 구하기

In [75]:
df = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
df['timestamp'] = pd.to_datetime(df['Date'])
df = df.set_index('timestamp')
df.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200


In [34]:

df["Upper Bollinger Band"] = df["Close"].rolling(window=10).mean() + df["Close"].rolling(window=10).std() * 2
df["Lower Bollinger Band"] = df["Close"].rolling(window=10).mean() - df["Close"].rolling(window=10).std() * 2
df.head(15)

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,3_day_MA,5_day_std,7_day_max_high,7_day_min_low,Upper Bollinger Band,Lower Bollinger Band
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-07-19,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,,,,,,
2023-07-20,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,,,,,,
2023-07-21,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,193.390004,,,,,
2023-07-24,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,192.606669,,,,,
2023-07-25,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,192.769999,1.175063,,,,
2023-07-26,2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900,193.623332,0.956644,,,,
2023-07-27,2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200,193.779999,0.956126,198.229996,191.229996,,
2023-07-28,2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400,194.516668,1.215826,197.199997,191.229996,,
2023-07-31,2023-07-31,196.059998,196.490005,195.259995,196.449997,195.412201,38824100,195.166667,1.390442,197.199997,191.229996,,
2023-08-01,2023-08-01,196.240005,196.729996,195.279999,195.610001,194.576645,35175100,195.963333,1.275487,197.199997,192.25,197.215522,191.21448


In [35]:
window_length = 10
df["Moving_Average"] = df["Close"].rolling(window=window_length).mean()
df["Moving_STD"] = df["Close"].rolling(window=window_length).std()

In [36]:
# Upper Bollinger Band = Moving average + (Moving STD * 2)
# Lower Bollinger Band = Moving average - (Moving STD * 2)
df["Upper_Bollinger"] = df["Moving_Average"] + (df["Moving_STD"] * 2)
df["Lower_Bollinger"] = df["Moving_Average"] - (df["Moving_STD"] * 2)

In [53]:
df.head(15)

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-03-18 09:00:00,72600,72700,72500,72700,525448
2024-03-18 09:01:00,72700,72900,72600,72800,243017
2024-03-18 09:02:00,72800,72800,72700,72800,45015
2024-03-18 09:03:00,72800,72800,72700,72800,48635
2024-03-18 09:04:00,72700,72800,72600,72600,265091
2024-03-18 09:05:00,72700,72700,72500,72600,58264
2024-03-18 09:06:00,72600,72700,72500,72700,91938
2024-03-18 09:07:00,72700,72700,72600,72600,50769
2024-03-18 09:08:00,72700,72800,72600,72800,178946
2024-03-18 09:09:00,72800,72800,72700,72800,34245


## 4) Resample
- `resample()` 함수는 시계열 데이터에서 시간 간격을 재조정할 때 사용합니다. 예를 들어, 1분봉 데이터를 1시간봉 데이터로 집계할 수 있습니다.

In [52]:
# 삼성전자 2024년 3월 18일 1분봉 데이터
df = pd.read_csv("../dataset/005039_20240318_1minute.csv", index_col=0)
df["timestamp"] = df["timestamp"].apply(lambda x: pd.to_datetime(x))
df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(int)
df.set_index('timestamp', inplace=True)

df.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-03-18 09:00:00,72600,72700,72500,72700,525448
2024-03-18 09:01:00,72700,72900,72600,72800,243017
2024-03-18 09:02:00,72800,72800,72700,72800,45015
2024-03-18 09:03:00,72800,72800,72700,72800,48635
2024-03-18 09:04:00,72700,72800,72600,72600,265091


In [56]:
# 1분봉 데이터의 volume에 대해 1시간 기준으로 합계하기
df["volume"].resample("1h").sum().reset_index()

Unnamed: 0,timestamp,volume
0,2024-03-18 09:00:00,3268524
1,2024-03-18 10:00:00,1165637
2,2024-03-18 11:00:00,1259625
3,2024-03-18 12:00:00,858394
4,2024-03-18 13:00:00,1476135
5,2024-03-18 14:00:00,1859321
6,2024-03-18 15:00:00,2489636


In [59]:
# 1분봉 데이터의 volume에 대해 30분을 기준으로 평균, 합산, 최대, 최소 계산하기
df["volume"].resample("30min").agg( ["mean", "sum", "max", "min"] )

Unnamed: 0_level_0,mean,sum,max,min
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-03-18 09:00:00,75841.83,2275255,525448,8507
2024-03-18 09:30:00,33108.97,993269,126522,2998
2024-03-18 10:00:00,20074.87,602246,174837,5182
2024-03-18 10:30:00,18779.7,563391,115167,5888
2024-03-18 11:00:00,18826.27,564788,173217,7229
2024-03-18 11:30:00,23161.23,694837,200569,3800
2024-03-18 12:00:00,17583.9,527517,94444,4361
2024-03-18 12:30:00,11029.23,330877,31191,7303
2024-03-18 13:00:00,30209.47,906284,208784,3017
2024-03-18 13:30:00,18995.03,569851,239885,4114


In [60]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-03-18 09:00:00,72600,72700,72500,72700,525448
2024-03-18 09:01:00,72700,72900,72600,72800,243017
2024-03-18 09:02:00,72800,72800,72700,72800,45015
2024-03-18 09:03:00,72800,72800,72700,72800,48635
2024-03-18 09:04:00,72700,72800,72600,72600,265091


In [61]:
# 1분봉 데이터를 5분봉으로 변환
df.resample("5min").agg({
    "open":"first",
    "high":"max",
    "low":"min",
    "close":"last",
    "volume":"sum"
}).reset_index()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2024-03-18 09:00:00,72600,72900,72500,72600,1127206
1,2024-03-18 09:05:00,72700,72800,72500,72800,414162
2,2024-03-18 09:10:00,72800,72900,72700,72800,275966
3,2024-03-18 09:15:00,72800,72900,72700,72800,130963
4,2024-03-18 09:20:00,72700,72800,72600,72700,213739
...,...,...,...,...,...,...
74,2024-03-18 15:10:00,72900,72900,72800,72900,90424
75,2024-03-18 15:15:00,72900,72900,72800,72800,344190
76,2024-03-18 15:20:00,72800,72800,72800,72800,0
77,2024-03-18 15:25:00,72800,72800,72800,72800,0


In [63]:
# 1분봉 데이터를 1시간 봉으로 변환
df.resample("1h").agg({
    "open":"first",
    "high":"max",
    "low":"min",
    "close":"last",
    "volume":"sum"
}).reset_index()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2024-03-18 09:00:00,72600,72900,72500,72800,3268524
1,2024-03-18 10:00:00,72800,72800,72600,72600,1165637
2,2024-03-18 11:00:00,72700,72700,72500,72700,1259625
3,2024-03-18 12:00:00,72600,72800,72600,72700,858394
4,2024-03-18 13:00:00,72700,72900,72600,72700,1476135
5,2024-03-18 14:00:00,72700,73000,72700,72800,1859321
6,2024-03-18 15:00:00,72900,72900,72800,72800,2489636
