# Load libraries & data

## Import 

In [1]:
import pandas as pd
import numpy as np
import os
import random
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

## set seed

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load data 

In [3]:
path = "/mnt/d/data/jeju"

train_df = pd.read_csv(os.path.join(path, './train.csv'))
test_df = pd.read_csv(os.path.join(path, './test.csv'))

# Preprocess

In [4]:
new_train = train_df[['ID','timestamp','supply(kg)', 'price(원/kg)']]
new_test = test_df[['ID','timestamp']]

new_train['item_id'] = new_train.ID.str[0:6]
new_test['item_id'] = new_test.ID.str[0:6]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_train['item_id'] = new_train.ID.str[0:6]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_test['item_id'] = new_test.ID.str[0:6]


## use only not zero data

In [5]:
# new_train = new_train[new_train["price(원/kg)"]!=0]

# Train autogluon

In [6]:
data = TimeSeriesDataFrame(new_train.drop(columns=['ID']))
predictor = TimeSeriesPredictor( 
    prediction_length=28,
    target="price(원/kg)",
    eval_metric="RMSE",
    # ignore_time_index=True,
)
# seed 고정
predictor.fit(data, num_val_windows=1)

TimeSeriesPredictor.fit() called
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': 'RMSE',
 'excluded_model_types': None,
 'hyperparameter_tune_kwargs': None,
 'hyperparameters': 'default',
 'num_val_windows': 1,
 'prediction_length': 28,
 'random_seed': None,
 'target': 'price(원/kg)',
 'time_limit': None,
 'verbosity': 2}
Provided training data set with 59397 rows, 39 items (item = single time series). Average time series length is 1523.0. Data frequency is 'D'.
AutoGluon will save models to AutogluonModels/ag-20231115_144006
AutoGluon will gauge predictive performance using evaluation metric: 'RMSE'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.

Provided dataset contains following columns:
	target:           'price(원/kg)'
	past covariates:  ['supply(kg)']

Starting training. Start time is 2023-11-15 23:40:06
Models that will be trained: ['Naive', 'SeasonalNaive', 'T

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7fef88cfce50>

In [8]:
predictor.refit_full(model="all")

Refitting models via `refit_full` using all of the data (combined train and validation)...
	Models trained in this way will have the suffix '_FULL' and have NaN validation score.
	This process is not bound by time_limit, but should take less time than the original `fit` call.
Fitting model: Naive_FULL | Skipping fit via cloning parent ...
Fitting model: SeasonalNaive_FULL | Skipping fit via cloning parent ...
Fitting model: Theta_FULL | Skipping fit via cloning parent ...
Fitting model: AutoETS_FULL | Skipping fit via cloning parent ...
Fitting model: RecursiveTabular_FULL
	12.38   s     = Training runtime
Fitting model: DeepAR_FULL | Skipping fit via cloning parent ...
Fitting model: Naive_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: SeasonalNaive_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: Theta_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: AutoETS_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: RecursiveTabular_FUL

{'Naive': 'Naive_FULL',
 'SeasonalNaive': 'SeasonalNaive_FULL',
 'Theta': 'Theta_FULL',
 'AutoETS': 'AutoETS_FULL',
 'RecursiveTabular': 'RecursiveTabular_FULL',
 'DeepAR': 'DeepAR_FULL',
 'Naive_FULL': 'Naive_FULL_FULL',
 'SeasonalNaive_FULL': 'SeasonalNaive_FULL_FULL',
 'Theta_FULL': 'Theta_FULL_FULL',
 'AutoETS_FULL': 'AutoETS_FULL_FULL',
 'RecursiveTabular_FULL': 'RecursiveTabular_FULL_FULL',
 'WeightedEnsemble': 'WeightedEnsemble_FULL'}

In [10]:
pred = predictor.predict(data)

Global seed set to 123
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble_FULL


In [11]:
submission = pd.read_csv(os.path.join(path, './sample_submission.csv'))
submission['answer'] = pred.reset_index()['mean']
submission.loc[ submission['answer'] < 0.0, 'answer'] = 0.0
submission.to_csv('./dacon_submission.csv', index=False)
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3305.776155
1,TG_A_J_20230305,560.808155
2,TG_A_J_20230306,2991.557792
3,TG_A_J_20230307,3461.988503
4,TG_A_J_20230308,3301.765249
...,...,...
1087,RD_F_J_20230327,574.323583
1088,RD_F_J_20230328,540.127946
1089,RD_F_J_20230329,545.481007
1090,RD_F_J_20230330,536.382355
