### Import Library

In [18]:
import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import accuracy_score, roc_auc_score


### Load Preprocessed Data

In [38]:
# 파일 호출
data_path: str =  os.path.join(os.path.abspath(''), 'data', 'preprocessed')
train_df: pd.DataFrame = pd.read_csv(os.path.join(data_path, "train.csv"), index_col=0)
test_df: pd.DataFrame = pd.read_csv(os.path.join(data_path, "test.csv"), index_col=0)
all_df: pd.DataFrame = pd.concat([train_df, test_df], axis=0)

### Load Model

In [39]:
import importlib

module_name = 'naive_model'
model_name = ''.join([word.title() for word in module_name.split('_')])

model_path: str = os.path.join(os.path.abspath(''), 'models')
from models.model import Model
if model_path not in sys.path:
    sys.path.append(model_path)
if module_name in sys.modules:
        del sys.modules[module_name]
module = importlib.import_module(module_name)
selected_model_cls = getattr(module, model_name)

### Train-Validation process

In [43]:
strength_accuracy_list = []
direction_accuracy_list = []
accuracy_list = []

train_X = train_df.drop(columns=['target'])
train_y = train_df['target']

tscv = TimeSeriesSplit(n_splits=10)
for cur_split, (train_index, val_index) in enumerate(tscv.split(train_df)):

    cur_train_X = train_X.iloc[train_index].copy()
    cur_train_y = train_y.iloc[train_index].copy()
    cur_val_X = train_X.iloc[val_index].copy()
    cur_val_y = train_y.iloc[val_index].copy()

    model: Model = selected_model_cls()
    model.fit(cur_train_X, cur_train_y)
    cur_val_y_pred = model.predict(cur_val_X)

    cur_val_y_direction_pred = cur_val_y_pred.apply(lambda x: 0 if x == 0 or x == 1 else 1)
    cur_val_y_strength_pred = cur_val_y_pred.apply(lambda x: 0 if x == 1 or x == 2 else 1)
    cur_val_y_direction = cur_val_y.apply(lambda x: 0 if x == 0 or x == 1 else 1)
    cur_val_y_strength = cur_val_y.apply(lambda x: 0 if x == 1 or x == 2 else 1)

    accuracy = accuracy_score(cur_val_y, cur_val_y_pred)
    accuracy_list.append(accuracy)
    print(f'정확도: {accuracy_list[-1]:.4f}', end='  ')

    direction_accuracy = accuracy_score(cur_val_y_direction, cur_val_y_direction_pred)
    direction_accuracy_list.append(direction_accuracy)
    print(f'방향 정확도: {direction_accuracy_list[-1]:.4f}', end='  ')

    strength_accuracy = accuracy_score(cur_val_y_strength, cur_val_y_strength_pred)
    strength_accuracy_list.append(strength_accuracy)
    print(f'강도 정확도: {strength_accuracy_list[-1]:.4f}', end='  ')
    print('')

print('')
print('평균 정확도: ', np.mean(accuracy_list))
print('평균 방향 정확도: ', np.mean(direction_accuracy_list))
print('평균 강도 정확도: ', np.mean(strength_accuracy_list))

정확도: 0.4271  방향 정확도: 0.4925  강도 정확도: 0.8668  
정확도: 0.3731  방향 정확도: 0.5214  강도 정확도: 0.7274  
정확도: 0.4083  방향 정확도: 0.4962  강도 정확도: 0.8266  
정확도: 0.4410  방향 정확도: 0.5038  강도 정확도: 0.8781  
정확도: 0.2638  방향 정확도: 0.4987  강도 정확도: 0.5465  
정확도: 0.4711  방향 정확도: 0.5126  강도 정확도: 0.9108  
정확도: 0.4736  방향 정확도: 0.5151  강도 정확도: 0.9058  
정확도: 0.4359  방향 정확도: 0.5038  강도 정확도: 0.8769  
정확도: 0.4460  방향 정확도: 0.5276  강도 정확도: 0.8480  
정확도: 0.4183  방향 정확도: 0.5138  강도 정확도: 0.8204  

평균 정확도:  0.4158291457286432
평균 방향 정확도:  0.5085427135678391
평균 강도 정확도:  0.8207286432160805


### Submission

In [17]:
model: Model = selected_model_cls()
model.fit(train_X, train_y)
test_y_pred = model.predict(test_df)
test_y_pred = test_y_pred.astype(int)

submission_df = pd.DataFrame(
    {'target': test_y_pred.values},
    index = pd.date_range(
        start=pd.Timestamp('2024-01-01 00:00:00'),
        end=pd.Timestamp('2024-04-26 07:00:00'),
        freq='1h'
    )
)
print(submission_df.value_counts())
submission_df.to_csv('output.csv', index=True, index_label='ID')

target
1         2792
Name: count, dtype: int64
