### Library Import

In [1]:
import os
from typing import List, Dict
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from plotly.subplots import make_subplots
import plotly.graph_objects as go


### Data Load

In [121]:
# 파일 호출
data_path = os.getcwd() + '\\data\\data\\'
df = pd.read_csv(data_path + 'HOURLY_MARKET-DATA_PRICE-OHLCV_ALL_EXCHANGE_SPOT_BTC_USD.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
df['weekend'] = (df['datetime'].dt.dayofweek >= 5).astype(int)
df.head()


Unnamed: 0,datetime,close,volume,weekend
0,2023-01-01 00:00:00,16536.747967,5516.420322,1
1,2023-01-01 01:00:00,16557.136536,4513.341881,1
2,2023-01-01 02:00:00,16548.149805,4310.904314,1
3,2023-01-01 03:00:00,16533.632875,4893.417864,1
4,2023-01-01 04:00:00,16524.712159,5209.002297,1


In [122]:
#train test split
cutoff = "2023-04-01"
train_df = df[df['datetime']>=cutoff]

In [123]:

train = pd.DataFrame({"ds": train_df["datetime"], "y": train_df["close"], "weekend": train_df["weekend"]})

train = train.dropna()
train = train.replace([np.inf, -np.inf], np.nan).dropna()

### Model Training

In [124]:
from neuralprophet import NeuralProphet, set_log_level

In [125]:
# 모델 정의
m2 = NeuralProphet(
    growth="linear",
    n_changepoints=10,
    changepoints_range=0.9,
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode="multiplicative",
    n_lags=24,
    ar_reg=0.1,
    n_forecasts=24,
    learning_rate=0.05,
    epochs=100,
    batch_size=64,
    loss_func="Huber"
)

# 추가 계절성 정의
m2 = m2.add_seasonality(name="quarterly", period=90, fourier_order=5)
m2 = m2.add_seasonality(name="monthly", period=30, fourier_order=3)

# 주말 효과 추가
m2 = m2.add_events(["weekend"])
m2.fit(train, freq="H")


  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.985% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  df_grouped = df.groupby("ID").apply(lambda x: x.set_index("ds").resample(freq).asfreq()).drop(columns=["ID"])
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.


Training: |          | 0/? [00:00<?, ?it/s]

c:\Users\findu\Desktop\중요하고 급한 폴더\tothemars\.conda\lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
c:\Users\findu\Desktop\중요하고 급한 폴더\tothemars\.conda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [02:09<?, ?it/s, v_num=4, train_loss=0.000784, reg_loss=2.93e-5, MAE=299.0, RMSE=437.0, Loss=0.000782, RegLoss=2.93e-5]


Unnamed: 0,train_loss,reg_loss,MAE,RMSE,Loss,RegLoss,epoch
0,0.148282,0.000000,4735.522461,8172.810059,0.147439,0.000000,0
1,0.002681,0.000000,588.464417,800.017212,0.002672,0.000000,1
2,0.001829,0.000000,478.902496,672.974731,0.001828,0.000000,2
3,0.001750,0.000000,469.352844,659.555237,0.001748,0.000000,3
4,0.001690,0.000000,463.427063,650.180420,0.001694,0.000000,4
...,...,...,...,...,...,...,...
95,0.000785,0.000030,298.663269,436.709686,0.000783,0.000030,95
96,0.000785,0.000030,298.665741,437.402985,0.000785,0.000030,96
97,0.000784,0.000029,299.625946,438.079407,0.000784,0.000029,97
98,0.000785,0.000030,298.459412,437.164001,0.000783,0.000030,98


In [128]:
# 1. future 데이터프레임 읽기
future = pd.read_csv(data_path + 'test.csv')
future['ds'] = pd.to_datetime(future['ID'])
future = future.drop(columns='ID', axis=1)
future['y'] = 0
future['weekend'] = (future['ds'].dt.dayofweek >= 5).astype(int)

# 2. train 데이터의 마지막 24시간 데이터 가져오기
last_day_train = train[train['ds'] >= '2023-12-31']

last_day_train.tail()

# 3. future 데이터의 마지막 날짜 이후 24시간 생성
last_future_date = future['ds'].max()
extra_dates = pd.date_range(start=last_future_date + pd.Timedelta(hours=1), periods=24, freq='H')
extra_future = pd.DataFrame({'ds': extra_dates})
extra_future['y'] = 0
extra_future['weekend'] = (extra_future['ds'].dt.dayofweek >= 5).astype(int)

extra_future.tail()

# 4. 데이터 합치기
combined_future = pd.concat([last_day_train, future, extra_future], ignore_index=True)

  extra_dates = pd.date_range(start=last_future_date + pd.Timedelta(hours=1), periods=24, freq='H')


In [134]:
# 5. 예측 수행
forecast = m2.predict(combined_future)

# 6. 예측 결과 처리
predict = forecast[['ds', 'yhat1']].rename(columns={'yhat1': 'predicted_price'})

# 7. 가격 변화율 계산 (NaN 값 처리)
predict['price_change'] = predict['predicted_price'].pct_change().fillna(0)

# 8. 가격 등락을 0~3으로 나타내는 함수 정의
def classify_price_change(change):
    if np.isnan(change):
        return None
    elif change < -0.005:
        return 0
    elif -0.005 <= change < 0:
        return 1
    elif 0 <= change < 0.005:
        return 2
    else:
        return 3

# 9. 가격 등락 분류 적용
predict['price_change_category'] = predict['price_change'].apply(classify_price_change)

# 10. 결과 필터링 (원래 future 데이터의 범위만 선택)
predict_filtered = predict[(predict['ds'] >= future['ds'].min()) & (predict['ds'] <= future['ds'].max())]

# 11. 결과 확인
#print(predict_filtered[['ds', 'predicted_price', 'price_change', 'price_change_category']])

# 12. CSV 파일로 저장
#predict_filtered.to_csv('bitcoin_price_prediction_with_categories.csv', index=False)

csv = predict_filtered[['ds', 'price_change_category']].rename(columns={'ds': 'ID', 'price_change_category' : 'target'})
csv.to_csv('output.csv', index=False)

# 13. 마지막 예측 날짜 확인
print("Last prediction date:", predict_filtered['ds'].max())

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.965% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.965% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  df_grouped = df.groupby("ID").apply(lambda

Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 54.85it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column



Last prediction date: 2024-04-26 07:00:00


  predict['price_change'] = predict['predicted_price'].pct_change().fillna(0)


In [48]:
# 1. future 데이터프레임 읽기
future = pd.read_csv(data_path + 'test.csv')
future['ds'] = pd.to_datetime(future['ID'])
future = future.drop(columns='ID', axis=1)
future['y'] = 0
future['weekend'] = (future['ds'].dt.dayofweek >= 5).astype(int)
future.head()

Unnamed: 0,ds,y,weekend
0,2024-01-01 00:00:00,0.0,0
1,2024-01-01 01:00:00,0.0,0
2,2024-01-01 02:00:00,0.0,0
3,2024-01-01 03:00:00,0.0,0
4,2024-01-01 04:00:00,0.0,0


In [51]:

# 2. 예측 수행
forecast = m2.predict(df=future)

# 3. 예측 결과 처리
predict = forecast[['ds', 'yhat1']].rename(columns={'yhat1': 'predicted_price'})

# 4. 가격 변화율 계산 (NaN 값 처리)
predict['price_change'] = predict['predicted_price'].pct_change().fillna(0)

# 5. 가격 등락을 0~3으로 나타내는 함수 정의
def classify_price_change(change):
    if np.isnan(change):
        return None
    elif change < -0.005:
        return 0
    elif -0.005 <= change < 0:
        return 1
    elif 0 <= change < 0.005:
        return 2
    else:
        return 3

# 6. 가격 등락 분류 적용
predict['price_change_category'] = predict['price_change'].apply(classify_price_change)

# 7. 결과 필터링 (2024-01-01 00:00:00부터 2024-04-26 07:00:00까지)
start_date = pd.to_datetime('2024-01-01 00:00:00')
end_date = pd.to_datetime('2024-04-26 07:00:00')
predict_filtered = predict[(predict['ds'] >= start_date) & (predict['ds'] <= end_date)]

# 8. 결과 확인
print(predict_filtered[['ds', 'predicted_price', 'price_change', 'price_change_category']])

# 9. CSV 파일로 저장
predict_filtered.to_csv('bitcoin_price_prediction_with_categories.csv', index=False)

# 10. 마지막 예측 날짜 확인
print("Last prediction date:", predict['ds'].max())

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.964% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.964% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  df_grouped = df.groupby("ID").apply(lambda

Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 48.20it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column



                      ds  predicted_price  price_change  price_change_category
0    2024-01-01 00:00:00              NaN           0.0                      2
1    2024-01-01 01:00:00              NaN           0.0                      2
2    2024-01-01 02:00:00              NaN           0.0                      2
3    2024-01-01 03:00:00              NaN           0.0                      2
4    2024-01-01 04:00:00              NaN           0.0                      2
...                  ...              ...           ...                    ...
2787 2024-04-26 03:00:00              NaN           0.0                      2
2788 2024-04-26 04:00:00              NaN           0.0                      2
2789 2024-04-26 05:00:00              NaN           0.0                      2
2790 2024-04-26 06:00:00              NaN           0.0                      2
2791 2024-04-26 07:00:00              NaN           0.0                      2

[2792 rows x 4 columns]
Last prediction date: 2024

  predict['price_change'] = predict['predicted_price'].pct_change().fillna(0)


In [None]:


# 11. 추가: 트렌드 컴포넌트 확인 (선택사항)
if 'trend' in forecast.columns:
    print("\nTrend component:")
    print(forecast[['ds', 'trend']].head())

# 12. 추가: 다른 예측 기간에 대한 결과 확인 (선택사항)
if 'yhat2' in forecast.columns:
    print("\n2-step ahead prediction:")
    print(forecast[['ds', 'yhat2']].head())

### Output File Save

In [18]:
# output file 할당후 save 
submission_df = submission_df.assign(target = y_test_pred_class)
submission_df.to_csv("output.csv", index=False)

In [3]:
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet
import os

data_path = os.getcwd() + '\\data\\data\\'
# 1. 데이터 로드 및 전처리
train = pd.read_csv(data_path + 'HOURLY_MARKET-DATA_PRICE-OHLCV_ALL_EXCHANGE_SPOT_BTC_USD.csv')
train.head()
train['datetime'] = pd.to_datetime(train['datetime'])
train = train.rename(columns={'close': 'y', 'datetime' : 'ds'})
train = train.drop(columns='volume', axis=1)
cutoff = "2023-04-01"
train = train[train['ds']>=cutoff]

In [4]:
train.head()

Unnamed: 0,ds,y
2160,2023-04-01 00:00:00,28441.265996
2161,2023-04-01 01:00:00,28614.022826
2162,2023-04-01 02:00:00,28576.108274
2163,2023-04-01 03:00:00,28534.29525
2164,2023-04-01 04:00:00,28579.253756


In [6]:
future = pd.read_csv(data_path + 'test.csv')
future['ds'] = pd.to_datetime(future['ID'])
future = future.drop(columns='ID', axis=1)
future['y'] = np.nan  # 예측할 값은 NaN으로 설정

# 2. 데이터 스케일링
scale_factor = 1000  # 가격을 1000으로 나눔
train['y'] = train['y'] / scale_factor
future['y'] = future['y'] / scale_factor

# 3. 주말 정보 추가
train['weekend'] = (train['ds'].dt.dayofweek >= 5).astype(int)
future['weekend'] = (future['ds'].dt.dayofweek >= 5).astype(int)

# 4. train 데이터의 마지막 24시간 데이터 가져오기
last_day_train = train[train['ds'] >= train['ds'].max() - pd.Timedelta(days=1)]

# 5. future 데이터의 마지막 날짜 이후 24시간 생성
last_future_date = future['ds'].max()
extra_dates = pd.date_range(start=last_future_date + pd.Timedelta(hours=1), periods=24, freq='h')
extra_future = pd.DataFrame({'ds': extra_dates, 'y': np.nan, 'weekend': (extra_dates.dayofweek >= 5).astype(int)})

# 6. 데이터 합치기
combined_future = pd.concat([last_day_train, future, extra_future], ignore_index=True)


In [7]:

# 7. 모델 재정의 및 학습
m = NeuralProphet(
    growth="linear",
    n_changepoints=10,
    changepoints_range=0.9,
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode="multiplicative",
    n_lags=24,
    n_forecasts=1,
    ar_reg=0.1,
    learning_rate=0.01,
    epochs=100,
    batch_size=64,
    loss_func="Huber"
)

# 추가 계절성 정의
m = m.add_seasonality(name="quarterly", period=90, fourier_order=5)
m = m.add_seasonality(name="monthly", period=30, fourier_order=3)

# 주말 효과 추가
m = m.add_events(["weekend"])
m.fit(train, freq="h")

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.985% of the data.
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - h
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.


Epoch 1:   1%|          | 1/100 [00:00<00:00, 991.80it/s]





Training: |          | 0/? [01:25<?, ?it/s, v_num=5, train_loss=0.000347, reg_loss=0.000149, MAE=0.000143, RMSE=0.000227, Loss=0.000347, RegLoss=0.000149]


Unnamed: 0,train_loss,reg_loss,MAE,RMSE,Loss,RegLoss,epoch
0,0.356511,0.000000,0.011657,0.014665,0.355856,0.000000,0
1,0.078668,0.000000,0.004035,0.004926,0.078610,0.000000,1
2,0.033370,0.000000,0.002526,0.003072,0.033340,0.000000,2
3,0.016234,0.000000,0.001737,0.002128,0.016218,0.000000,3
4,0.008097,0.000000,0.001215,0.001500,0.008090,0.000000,4
...,...,...,...,...,...,...,...
95,0.000348,0.000149,0.000144,0.000226,0.000348,0.000149,95
96,0.000348,0.000149,0.000143,0.000226,0.000348,0.000149,96
97,0.000348,0.000149,0.000144,0.000227,0.000348,0.000149,97
98,0.000348,0.000149,0.000144,0.000226,0.000348,0.000149,98


In [11]:
# 1. future 데이터프레임 읽기
future = pd.read_csv(data_path + 'test.csv')
future['ds'] = pd.to_datetime(future['ID'])
future = future.drop(columns='ID', axis=1)
future['y'] = 0
future['weekend'] = (future['ds'].dt.dayofweek >= 5).astype(int)

# 2. train 데이터의 마지막 24시간 데이터 가져오기
last_day_train = train[train['ds'] >= '2023-12-31']

last_day_train.tail()

# 3. future 데이터의 마지막 날짜 이후 24시간 생성
last_future_date = future['ds'].max()
extra_dates = pd.date_range(start=last_future_date + pd.Timedelta(hours=1), periods=24, freq='H')
extra_future = pd.DataFrame({'ds': extra_dates})
extra_future['y'] = 0
extra_future['weekend'] = (extra_future['ds'].dt.dayofweek >= 5).astype(int)

extra_future.tail()

# 4. 데이터 합치기
combined_future = pd.concat([last_day_train, future, extra_future], ignore_index=True)

# 5. 예측 수행
forecast = m.predict(combined_future)

# 6. 예측 결과 처리
predict = forecast[['ds', 'yhat1']].rename(columns={'yhat1': 'predicted_price'})

# 7. 가격 변화율 계산 (NaN 값 처리)
predict['price_change'] = predict['predicted_price'].pct_change().fillna(0)

# 8. 가격 등락을 0~3으로 나타내는 함수 정의
def classify_price_change(change):
    if np.isnan(change):
        return None
    elif change < -0.005:
        return 0
    elif -0.005 <= change < 0:
        return 1
    elif 0 <= change < 0.005:
        return 2
    else:
        return 3

# 9. 가격 등락 분류 적용
predict['price_change_category'] = predict['price_change'].apply(classify_price_change)

# 10. 결과 필터링 (원래 future 데이터의 범위만 선택)
predict_filtered = predict[(predict['ds'] >= future['ds'].min()) & (predict['ds'] <= future['ds'].max())]

# 11. 결과 확인
#print(predict_filtered[['ds', 'predicted_price', 'price_change', 'price_change_category']])

# 12. CSV 파일로 저장
predict_filtered.to_csv('bitcoin_price_prediction_with_categories.csv', index=False)

csv = predict_filtered[['ds', 'price_change_category']].rename(columns={'ds': 'ID', 'price_change_category' : 'target'})
csv.to_csv('output.csv', index=False)

# 13. 마지막 예측 날짜 확인
print("Last prediction date:", predict_filtered['ds'].max())

  extra_dates = pd.date_range(start=last_future_date + pd.Timedelta(hours=1), periods=24, freq='H')



  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.965% of the data.
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - h
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.965% of the data.
  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - h


Predicting DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 89.13it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column



Last prediction date: 2024-04-26 07:00:00
