In [1]:
import pandas as pd

file_paths = [
    'data/trend/bitcoin_trends_daily_2017_utc0.csv',
    'data/trend/bitcoin_trends_daily_2018_utc0.csv',
    'data/trend/bitcoin_trends_daily_2019_utc0.csv',
    'data/trend/bitcoin_trends_daily_2020_utc0.csv',
    'data/trend/bitcoin_trends_daily_2021_utc0.csv',
    'data/trend/bitcoin_trends_daily_2022_utc0.csv',
    'data/trend/bitcoin_trends_daily_2023_utc0.csv',
    'data/trend/bitcoin_trends_daily_2024_utc0.csv',
]

data_list = []

for file_path in file_paths:
    data = pd.read_csv(file_path)
    data_list.append(data)

trend_data = pd.concat(data_list)

file_kor_path = 'data/BTC_KRW 빗썸 과거 데이터.csv'
file_usd_path = 'data/BTC_USD 비트파이넥스 과거 데이터.csv'
file_exc_path = 'data/exchange_rate.csv'

btc_krw = pd.read_csv(file_kor_path)
btc_usd = pd.read_csv(file_usd_path)
exc_rate = pd.read_csv(file_exc_path)

print(trend_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 2844 entries, 0 to 336
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   timestamp  2844 non-null   int64 
 1   date       2844 non-null   object
 2   비트코인       2844 non-null   int64 
 3   업비트        2844 non-null   int64 
 4   bitcoin    2844 non-null   int64 
 5   coinbase   2844 non-null   int64 
dtypes: int64(5), object(1)
memory usage: 155.5+ KB
None


In [2]:
# 기존 'timestamp' 열 제거
if 'timestamp' in trend_data.columns:
    trend_data.drop(columns=['timestamp'], inplace=True)

# 환율 데이터 결측치 처리
exc_rate['Exchange Rate'] = exc_rate['Exchange Rate'].fillna(method='ffill')

# 결측치가 여전히 남아 있는지 확인
print("환율 데이터 결측치 개수:", exc_rate['Exchange Rate'].isna().sum())


환율 데이터 결측치 개수: 0


  exc_rate['Exchange Rate'] = exc_rate['Exchange Rate'].fillna(method='ffill')


In [3]:
# 2. 타임스탬프 통일
trend_data.rename(columns={'date': 'timestamp'}, inplace=True)
btc_krw.rename(columns={'날짜': 'timestamp'}, inplace=True)
btc_usd.rename(columns={'날짜': 'timestamp'}, inplace=True)
exc_rate.rename(columns={'Date': 'timestamp'}, inplace=True)

trend_data['timestamp'] = pd.to_datetime(trend_data['timestamp'])
btc_krw['timestamp'] = pd.to_datetime(btc_krw['timestamp'])
btc_usd['timestamp'] = pd.to_datetime(btc_usd['timestamp'])
exc_rate['timestamp'] = pd.to_datetime(exc_rate['timestamp'])


In [4]:
# 환율 병합 (BTC-KRW와 BTC-USD에 동일 환율 적용)
btc_usd = pd.merge(btc_usd, exc_rate, on='timestamp', how='inner')  # 환율 데이터 병합
btc_krw = pd.merge(btc_krw, exc_rate, on='timestamp', how='inner')  # 환율 데이터 병합

# BTC-KRW 데이터 타입 변환 및 원화 기준 데이터 생성
btc_krw['close'] = btc_krw['종가'].str.replace(',', '').astype(float)
btc_krw['open'] = btc_krw['시가'].str.replace(',', '').astype(float)
btc_krw['high'] = btc_krw['고가'].str.replace(',', '').astype(float)
btc_krw['low'] = btc_krw['저가'].str.replace(',', '').astype(float)
btc_krw['volume'] = btc_krw['거래량'].str.replace('K', '').astype(float) * 1000  # 거래량 K 처리
btc_krw['change'] = btc_krw['변동 %'].str.replace('%', '').astype(float)

# BTC-USD 데이터 타입 변환 후 환율 적용 (원화 기준)
btc_usd['close'] = btc_usd['종가'].str.replace(',', '').astype(float) * btc_usd['Exchange Rate']
btc_usd['open'] = btc_usd['시가'].str.replace(',', '').astype(float) * btc_usd['Exchange Rate']
btc_usd['high'] = btc_usd['고가'].str.replace(',', '').astype(float) * btc_usd['Exchange Rate']
btc_usd['low'] = btc_usd['저가'].str.replace(',', '').astype(float) * btc_usd['Exchange Rate']
btc_usd['volume'] = btc_usd['거래량'].str.replace('K', '').astype(float) * 1000  # 거래량 K 처리
btc_usd['change'] = btc_usd['변동 %'].str.replace('%', '').astype(float)

# 원래 열 삭제
columns_to_drop = ['종가', '시가', '고가', '저가', '거래량', '변동 %']
btc_usd = btc_usd.drop(columns=columns_to_drop)
btc_krw = btc_krw.drop(columns=columns_to_drop)

# 타임스탬프 오름차순 정렬
btc_krw = btc_krw.sort_values(by='timestamp', ascending=True)
btc_usd = btc_usd.sort_values(by='timestamp', ascending=True)

# 데이터 확인
print(btc_krw.head())
print(btc_usd.head())

# 정렬된 데이터 저장
btc_krw.to_csv('btc_krw_sorted.csv', index=False)
btc_usd.to_csv('btc_usd_sorted.csv', index=False)


      timestamp  Exchange Rate      close       open       high        low  \
2737 2017-05-23         1117.1  3206000.0  3104000.0  3281000.0  3081000.0   
2736 2017-05-24         1120.6  4175000.0  3206000.0  4314000.0  3206000.0   
2735 2017-05-25         1126.6  4199000.0  4175000.0  4840000.0  3102000.0   
2734 2017-05-26         1118.0  3227000.0  4199000.0  4200000.0  2900000.0   
2733 2017-05-27         1118.0  3152000.0  3227000.0  3288000.0  2460000.0   

       volume  change  
2737  21580.0    3.29  
2736  34680.0   30.22  
2735  35910.0    0.57  
2734  36650.0  -23.15  
2733  33750.0   -2.32  
      timestamp  Exchange Rate    close     open     high      low  volume  \
4672 2012-02-02         1126.4  6871.04  6871.04  6871.04  6871.04     NaN   
4671 2012-02-03         1118.5  6711.00  6711.00  6711.00  6711.00     NaN   
4670 2012-02-04         1118.5  6599.15  6599.15  6599.15  6599.15     NaN   
4669 2012-02-05         1118.5  6375.45  6375.45  6375.45  6375.45     NaN 