### Library Import

In [None]:
import os
import sys

from prophet import Prophet
import numpy as np
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
import random
import torch

# Code 경로 추가
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(""))))
print(sys.path[-1])
pd.set_option('display.max_columns', None)  # 전체 열 출력하기
pd.set_option('display.max_rows', None)  # 전체 행 출력하기

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(42)


seed_everything(42)

### Data Load

In [None]:
# 파일 호출
data_path: str = "../../data"
## raw.csv가 없는 경우 실행
# from Code.dataset.merge_all import merge_all
# df = merge_all(data_path)
train_data: pd.DataFrame = pd.read_csv(os.path.join(data_path, "raw.csv"))
train_data = train_data.loc[train_data["_type"] == "train"]
sub: pd.DataFrame = pd.read_csv(os.path.join(data_path, "test.csv"))  # ID, target 열만 가진 데이터 미리 호출
sub["Time"] = sub.ID

### Model Training

In [None]:
cols = train_data.columns
new_cols = []
for col in cols:
    if col in ['ID', 'target', '_type']:
        new_cols.append(col)
        continue
    col = f"{col}".replace("hourly_", "").split("_", maxsplit=2)[2]
    new_cols.append(col)

train_data.columns = new_cols
# 종가 및 거래량 컬럼 찾기
new_cols = []
for c in train_data.columns:
    if c.find("close") != -1 or c.find("volume") != -1:
        print(c)
    if c.__contains__("all_exchange") or c in ['ID', 'target', '_type'] or c.__contains__(
            "block") or c.__contains__("difficulty") or c.__contains__("supply") or c.__contains__("fees"):
        new_cols.append(c)

train_data = train_data[new_cols]
rename_dict = {
    "all_exchange_spot_btc_usd_close": "target_closed",
    "all_exchange_spot_btc_usd_volume": "target_volume"
}
train_data.rename(columns=rename_dict, inplace=True)
train_data.head()

### Closed(%) to Target(Class)

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

fig: go.Figure = make_subplots(
    rows=1,
    cols=2,
    shared_xaxes=True,
    subplot_titles=(
        "closed to target(%)",
        "volume to target(%)",
    ),
)

up_down_closed = 1 - (train_data["target_closed"].shift(-1) / train_data["target_closed"])
up_down_volume = 1 - (train_data["target_volume"].shift(-1) / train_data["target_volume"])
train_data.loc[:, "up_down_closed"] = up_down_closed

fig.add_trace(
    go.Scatter(x=train_data["target"], y=up_down_closed,
               mode="markers"), row=1, col=1)

fig.add_trace(go.Box(x=train_data["target"], y=up_down_volume), row=1, col=2)
fig.update_xaxes(title_text="Target")
fig.update_layout(title_text="Target statistics", showlegend=False)
fig.show()
train_data["up_down_closed"]

In [None]:
def closed_to_target(closed_series: pd.Series) -> float:
    closed = closed_series.copy()
    closed_future = closed_series.shift(-1)
    _up_down_closed = 1 - (closed / closed_future)

    def to_class(x: float):
        if x <= -0.05:
            return 0.0
        elif x < 0:
            return 1.0
        elif x < 0.05:
            return 2.0
        else:
            return 3.0

    result = _up_down_closed.apply(to_class)
    return result


# closed_to_percent(train_data["target_closed"])
pd.concat(
    [train_data["ID"], train_data["target_closed"], train_data["target_closed"].shift(1), train_data["target"],
     closed_to_target(train_data["target_closed"]), train_data["ID"].shift(-1)],
    axis=1,
    ignore_index=True)

In [None]:
train_data.info()

In [None]:
train_data.drop(columns=["all_exchange_open_interest"], inplace=True)

In [None]:
train_data.dropna().info()

In [None]:
from scipy.stats import anderson, shapiro
from sklearn.preprocessing import MinMaxScaler, Normalizer, RobustScaler

# 
columns = train_data.columns.values.tolist()
columns.remove("ID")
columns.remove("_type")
# 
train_df = train_data[columns].dropna().reset_index(drop=True)
scaler_1 = RobustScaler()
# # scaler_minmax.fit(train_data[columns])
scaled_train_df = scaler_1.fit_transform(train_df)
scaled_train_df = pd.DataFrame(scaled_train_df, columns=columns)

scaler_2 = Normalizer()
scaled_train_df = scaler_2.fit_transform(scaled_train_df)
scaled_train_df = pd.DataFrame(scaled_train_df, columns=columns)


In [None]:
# train_data["target_closed"].plot()
# scaled_train_test = (np.exp2(scaled_train_df["target_closed"]))
# scaled_train_test.plot(kind="bar")
norm_target = anderson(
    scaled_train_df["target_closed"])  #anderson(scaled_train_df)  #train_data["up_down_closed"].dropna(), dist='norm')
print(norm_target, "\n", anderson(train_data["target_closed"]))

In [None]:
# 양수 일때의 상관계수

df_corr = scaled_train_df.corr()["up_down_closed"]  #["up_down_closed"]  #["target_closed"]
df_corr = df_corr[df_corr != 1]
abs(df_corr).sort_values(ascending=False)

In [None]:
df_corr = train_data.drop(columns=["ID", "_type"]).corr()["up_down_closed"]  #["up_down_closed"]  #["target_closed"]
df_corr = df_corr[df_corr != 1]
abs(df_corr).sort_values(ascending=False)

In [None]:
pred_len = 2792


class ProphetAVG:
    def __init__(self, scale1=0.01, scale2=0.1):  # changepoint 반영 비율
        self.models = [
            Prophet(seasonality_mode='additive', changepoint_range=1,
                    changepoint_prior_scale=scale1),  # 기간 내 적은 changepoints 반영(trend에 강건)
            Prophet(seasonality_mode='additive', changepoint_range=1,
                    changepoint_prior_scale=scale2)  # 기간 내 많은 changepoints 반영(trend에 민감)
        ]
        self.forecasts = []
        self.df = None

    def fit(self, data):
        for model in self.models:
            model.fit(data)

    def predict(self, periods=pred_len, freq='h'):
        future_frames = [model.make_future_dataframe(periods=periods, freq=freq) for model in self.models]
        forecasts = [model.predict(future) for model, future in zip(self.models, future_frames)]
        # 두 모델의 평균 예측 생성
        avg_forecast = pd.concat([forecast['yhat'] for forecast in forecasts], axis=1).mean(axis=1)
        self.df = pd.DataFrame({
            'Time': sub['Time'],
            'Close': avg_forecast[-periods:].reset_index(drop=True)
        })
        return self.df

    def plot(self):
        plt.figure(figsize=(12, 6))
        plt.plot(self.df['Time'], self.df['Close'], label='Prediction', marker='o', linestyle='-')
        plt.xlabel('Time')
        plt.ylabel('Close Price')
        plt.legend()
        plt.grid(True)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

In [None]:
ma_train_df = train_data
rename_dict = {
    "target_closed": "y",
    "ID": "ds"
}
ma_train_df.rename(columns=rename_dict, inplace=True)
ma_train_df["Time"] = ma_train_df["ds"]
longterm_model = ProphetAVG()
longterm_model.fit(train_data)
longterm_forecast = longterm_model.predict()
# 안정화한 장기 예측 결과 plot
longterm_model.plot()

In [None]:
## 6개월 학습모델
midterm_train = ma_train_df[ma_train_df['ds'] >= '2023-03-29  12:00:00']

midterm_model = ProphetAVG()
midterm_model.fit(midterm_train)
midterm_forecast = midterm_model.predict()

## 3개월 학습모델
shortterm_train = ma_train_df[ma_train_df['ds'] >= '2023-09-29  12:00:00']

shortterm_model = ProphetAVG()
shortterm_model.fit(shortterm_train)
shortterm_forecast = shortterm_model.predict()

In [None]:
def ensemble_df():
    weighted_pred = []

    for idx in range(pred_len):
        if idx < pred_len * 1 / 4:  # 0 ~ 25% 기간
            weight = 1 - idx / pred_len * 4
            weighted_sum = (shortterm_forecast['Close'][idx] * weight +
                            midterm_forecast['Close'][idx] * (1 - weight) / 2 +
                            longterm_forecast['Close'][idx] * (1 - weight) / 2)

        elif idx < pred_len * 3 / 4:  # 25 ~ 75% 기간
            weight = 0.5 - (idx - pred_len * 1 / 4) / (pred_len * 1 / 2) / 2
            weighted_sum = (midterm_forecast['Close'][idx] * weight +
                            longterm_forecast['Close'][idx] * (1 - weight))

        else:  # 75% ~ 100% 기간
            weighted_sum = longterm_forecast['Close'][idx]

        weighted_pred.append(weighted_sum)

    return pd.DataFrame({'Time': sub['Time'], 'Close': weighted_pred})

In [None]:
forecast = ensemble_df()
plt.figure(figsize=(12, 6))
plt.plot(forecast['Time'], forecast['Close'], label='Prediction', marker='o', linestyle='-')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
forecast.columns = ["ID", "target"]
forecast["target"] = closed_to_target(forecast["target"])
forecast["target"] = forecast["target"].astype(int)

In [None]:
forecast.to_csv("output.csv")

In [None]:
forecast.groupby(by=["target"]).count()