In [None]:
!pip install pyupbit

In [None]:
!pip install ace_tools

In [None]:
# Windows + Anaconda + numpy + scikit-learn 조합일 경우 os 셋팅 추가
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

import requests
import numpy as np
import pandas as pd
import pyupbit as pyu
import re
from datetime import datetime
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.metrics import silhouette_score

from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from sklearn.preprocessing import StandardScaler
from IPython.display import display

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### 가상화폐 기본정보 가져오기

In [None]:
# KRW Market ticker 추출
url_market = "https://api.upbit.com/v1/market/all"
url_ticker = "https://api.upbit.com/v1/ticker"

markets_resp = requests.get(url_market)
markets_data = markets_resp.json()



krw_markets = [market['market'] for market in markets_data if market['market'].startswith('KRW-')]

df_crypto = pd.DataFrame({"market" : [full_ticker.split("-")[0]  for full_ticker in krw_markets],
                          "coin_name" : [full_ticker.split("-")[1]  for full_ticker in krw_markets],
                          "ticker" : krw_markets
                         }
                        )

In [None]:
def parse_market_cap(text):
    # 숫자와 단위를 추출
    word_match = re.search(r'([\d,.]+)([억만조])원', text)
    number = float(word_match.group(1).replace(',', ''))
    unit = word_match.group(2)

    unit_multipliers = {
        '만': 10000,
        '억': 100000000,
        '조': 1000000000000,
    }
    market_cap_numeric = number * unit_multipliers[unit]

    # 날짜를 추출하여 YYYY-MM-DD 형식으로 변환
    date_match = re.search(r'\((\d{2}\.\d{2}\.\d{2})\. 기준\)', text)
    date_str = date_match.group(1)
    date_formatted = datetime.strptime(date_str, '%y.%m.%d').strftime('%Y-%m-%d')

    return market_cap_numeric, date_formatted, unit

In [None]:
for i in range(len(df_crypto)):
    try:
        total_amount = requests.get("https://api-manager.upbit.com/api/v1/coin_info/pub/{}.json".format(df_crypto.coin_name[i])).json()['data']['header_key_values']['market_cap']['value']

        market_cap_numeric, date_formatted, unit = parse_market_cap(total_amount)

        df_crypto.loc[i, "total_amount"] = total_amount
        df_crypto.loc[i, "total_amount_value"] = market_cap_numeric
        df_crypto.loc[i, "unit"] = unit
        df_crypto.loc[i, "reference_date"] = date_formatted
    except Exception as e:
        print(df_crypto.coin_name[i])

### 시세데이터 가져오기

In [None]:
now = datetime.now()
formatted_time = now.strftime("%Y%m%d %H:%M:%S")

df_crypto_ohlcv = pd.DataFrame()

for crypto in df_crypto.ticker.tolist():
    # count : 가져올 기간 수
    # to : 기간 최대 일자
    # period : 데이터 요청 주기 (초)
    try:
        crypto_price = pyu.get_ohlcv(ticker=crypto,
                                    interval='day',
                                    count=365,
                                    to=formatted_time,
                                    period=0.1)
        crypto_price.insert(1, "ticker", crypto)

        # 이동평균 산출
        crypto_price['ma_05'] = crypto_price['close'].rolling(window=5).mean()
        crypto_price['ma_10'] = crypto_price['close'].rolling(window=10).mean()
        crypto_price['ma_20'] = crypto_price['close'].rolling(window=20).mean()
        crypto_price['ma_30'] = crypto_price['close'].rolling(window=30).mean()
        crypto_price['ma_60'] = crypto_price['close'].rolling(window=60).mean()
        crypto_price['ma_120'] = crypto_price['close'].rolling(window=120).mean()
        crypto_price['ma_200'] = crypto_price['close'].rolling(window=200).mean()

        # RSI(Relative Strength Index) 산출
        delta = crypto_price['close'].diff()
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        avg_gain = gain.rolling(window=14).mean()
        avg_loss = loss.rolling(window=14).mean()
        rs = avg_gain / avg_loss
        crypto_price['rsi'] = 100 - (100 / (1 + rs))

        # MACD 산출
        ema12 = crypto_price['close'].ewm(span=12, adjust=False).mean()
        ema26 = crypto_price['close'].ewm(span=26, adjust=False).mean()
        crypto_price['macd'] = ema12 - ema26
        crypto_price['signal'] = crypto_price['macd'].ewm(span=9, adjust=False).mean()
        crypto_price['macd_hist'] = crypto_price['macd'] - crypto_price['signal']

        # DMA(Difference of Moving Averages) 산출
        short_ma = crypto_price['close'].rolling(window=10).mean()
        long_ma = crypto_price['close'].rolling(window=50).mean()
        crypto_price['dma'] = short_ma - long_ma

        # 볼린저 밴드 (Bollinger Bands) 산출
        crypto_price['std20'] = crypto_price['close'].rolling(window=20).std()
        crypto_price['Upperbb'] = crypto_price['ma_20'] + (2 * crypto_price['std20'])
        crypto_price['Lowerbb'] = crypto_price['ma_20'] - (2 * crypto_price['std20'])


        df_crypto_ohlcv = pd.concat([df_crypto_ohlcv, crypto_price], axis=0)
    except Exception as e:
        print(crypto)

In [None]:
# 데이터 저장
df_crypto_ohlcv.to_parquet("df_crypto_ohlcv_daily.parquet")
df_crypto_ohlcv.to_csv("df_crypto_ohlcv_daily.csv")