# Load libraries & data

## Import 

In [1]:
import pandas as pd
import numpy as np
import os
import random
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

## set seed

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load data 

In [3]:
path = "/mnt/d/data/jeju"

train_df = pd.read_csv(os.path.join(path, './train.csv'))
test_df = pd.read_csv(os.path.join(path, './test.csv'))

# Preprocess

In [4]:
new_train = train_df[['ID','timestamp','supply(kg)', 'price(원/kg)']]
new_test = test_df[['ID','timestamp']]

new_train['item_id'] = new_train.ID.str[0:6]
new_test['item_id'] = new_test.ID.str[0:6]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_train['item_id'] = new_train.ID.str[0:6]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_test['item_id'] = new_test.ID.str[0:6]


In [5]:
new_train

Unnamed: 0,ID,timestamp,supply(kg),price(원/kg),item_id
0,TG_A_J_20190101,2019-01-01,0.0,0.0,TG_A_J
1,TG_A_J_20190102,2019-01-02,0.0,0.0,TG_A_J
2,TG_A_J_20190103,2019-01-03,60601.0,1728.0,TG_A_J
3,TG_A_J_20190104,2019-01-04,25000.0,1408.0,TG_A_J
4,TG_A_J_20190105,2019-01-05,32352.0,1250.0,TG_A_J
...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,452440.0,468.0,RD_F_J
59393,RD_F_J_20230228,2023-02-28,421980.0,531.0,RD_F_J
59394,RD_F_J_20230301,2023-03-01,382980.0,574.0,RD_F_J
59395,RD_F_J_20230302,2023-03-02,477220.0,523.0,RD_F_J


# Train autogluon

In [6]:
data = TimeSeriesDataFrame(new_train.drop(columns=['ID']))
predictor = TimeSeriesPredictor( 
    prediction_length=28,
    target="price(원/kg)",
    eval_metric="RMSE",
)
# seed 고정
predictor.fit(data, num_val_windows=1, presets="best_quality")

TimeSeriesPredictor.fit() called
Setting presets to: best_quality
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': 'RMSE',
 'excluded_model_types': None,
 'hyperparameter_tune_kwargs': {'num_trials': 3,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'hyperparameters': 'best_quality',
 'num_val_windows': 1,
 'prediction_length': 28,
 'random_seed': None,
 'target': 'price(원/kg)',
 'time_limit': None,
 'verbosity': 2}
Provided training data set with 59397 rows, 39 items (item = single time series). Average time series length is 1523.0. Data frequency is 'D'.
AutoGluon will save models to AutogluonModels/ag-20231117_131432
AutoGluon will gauge predictive performance using evaluation metric: 'RMSE'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.

Provided dataset contains following columns:
	target:           'pr

  0%|          | 0/3 [00:00<?, ?it/s]

	Trained 3 models while tuning DeepAR.
	-731.9471     = Validation score (-RMSE)
	719.44  s     = Total tuning time
Hyperparameter tuning model: TemporalFusionTransformer. 
	-736.0263     = Validation score (-RMSE)
	260.57  s     = Training runtime
	0.07    s     = Validation (prediction) runtime
Hyperparameter tuning model: PatchTST. 
	-1147.2427    = Validation score (-RMSE)
	49.11   s     = Training runtime
	0.08    s     = Validation (prediction) runtime
Hyperparameter tuning model: DirectTabular. 
	-793.9492     = Validation score (-RMSE)
	1.94    s     = Training runtime
	0.17    s     = Validation (prediction) runtime
Hyperparameter tuning model: AutoARIMA. 
	-850.1019     = Validation score (-RMSE)
	0.03    s     = Training runtime
	38.19   s     = Validation (prediction) runtime
Fitting simple weighted ensemble.
	-652.8199     = Validation score (-RMSE)
	2.09    s     = Training runtime
	5.77    s     = Validation (prediction) runtime
Training complete. Models trained: ['Naive

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7f72c1027cd0>

In [7]:
predictor.leaderboard()

                        model    score_val  pred_time_val  fit_time_marginal  fit_order
0            WeightedEnsemble  -652.819887       5.770372           2.091178         13
1                   DeepAR/T2  -731.947120       1.013033         214.140791          7
2   TemporalFusionTransformer  -736.026337       0.069723         260.571623          9
3                   DeepAR/T3  -744.052768       1.362737         337.948512          8
4            RecursiveTabular  -792.188106       1.625286          36.504223          5
5                   DeepAR/T1  -792.779818       0.877802         167.072783          6
6               DirectTabular  -793.949226       0.168086           1.939285         11
7                     AutoETS  -797.059173       8.832402           0.031781          4
8                       Theta  -838.834688      19.199668           0.028284          3
9               SeasonalNaive  -843.925825       0.040265           0.033828          2
10                  AutoARIMA  -

Unnamed: 0,model,score_val,pred_time_val,fit_time_marginal,fit_order
0,WeightedEnsemble,-652.819887,5.770372,2.091178,13
1,DeepAR/T2,-731.94712,1.013033,214.140791,7
2,TemporalFusionTransformer,-736.026337,0.069723,260.571623,9
3,DeepAR/T3,-744.052768,1.362737,337.948512,8
4,RecursiveTabular,-792.188106,1.625286,36.504223,5
5,DeepAR/T1,-792.779818,0.877802,167.072783,6
6,DirectTabular,-793.949226,0.168086,1.939285,11
7,AutoETS,-797.059173,8.832402,0.031781,4
8,Theta,-838.834688,19.199668,0.028284,3
9,SeasonalNaive,-843.925825,0.040265,0.033828,2


In [8]:
predictor.refit_full(model="all")

Refitting models via `refit_full` using all of the data (combined train and validation)...
	Models trained in this way will have the suffix '_FULL' and have NaN validation score.
	This process is not bound by time_limit, but should take less time than the original `fit` call.
Fitting model: Naive_FULL | Skipping fit via cloning parent ...
Fitting model: SeasonalNaive_FULL | Skipping fit via cloning parent ...
Fitting model: Theta_FULL | Skipping fit via cloning parent ...
Fitting model: AutoETS_FULL | Skipping fit via cloning parent ...
Fitting model: RecursiveTabular_FULL
	12.38   s     = Training runtime
Fitting model: DeepAR_FULL | Skipping fit via cloning parent ...
Fitting model: Naive_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: SeasonalNaive_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: Theta_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: AutoETS_FULL_FULL | Skipping fit via cloning parent ...
Fitting model: RecursiveTabular_FUL

{'Naive': 'Naive_FULL',
 'SeasonalNaive': 'SeasonalNaive_FULL',
 'Theta': 'Theta_FULL',
 'AutoETS': 'AutoETS_FULL',
 'RecursiveTabular': 'RecursiveTabular_FULL',
 'DeepAR': 'DeepAR_FULL',
 'Naive_FULL': 'Naive_FULL_FULL',
 'SeasonalNaive_FULL': 'SeasonalNaive_FULL_FULL',
 'Theta_FULL': 'Theta_FULL_FULL',
 'AutoETS_FULL': 'AutoETS_FULL_FULL',
 'RecursiveTabular_FULL': 'RecursiveTabular_FULL_FULL',
 'WeightedEnsemble': 'WeightedEnsemble_FULL'}

In [8]:
pred = predictor.predict(data)

Global seed set to 123
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [9]:
submission = pd.read_csv(os.path.join(path, './sample_submission.csv'))
submission['answer'] = pred.reset_index()['mean']
submission.loc[ submission['answer'] < 0.0, 'answer'] = 0.0
submission.to_csv('./dacon_submission.csv', index=False)
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3446.297633
1,TG_A_J_20230305,388.578624
2,TG_A_J_20230306,3380.089684
3,TG_A_J_20230307,3051.872186
4,TG_A_J_20230308,3298.558843
...,...,...
1087,RD_F_J_20230327,488.653159
1088,RD_F_J_20230328,493.503425
1089,RD_F_J_20230329,489.707449
1090,RD_F_J_20230330,481.240282
