# Import

In [18]:
import os
import pandas as pd
import random
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import autogluon.core as ag
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) 

# Data Load

In [19]:
train = pd.read_csv('train.csv').drop(columns=['SAMPLE_ID'])

test = pd.read_csv('test.csv').drop(columns=['SAMPLE_ID'])

In [20]:
train = train[train['DIST'] != 0]
train = train.reset_index(drop = True)
train

Unnamed: 0,ARI_CO,ARI_PO,SHIP_TYPE_CATEGORY,DIST,ATA,ID,BREADTH,BUILT,DEADWEIGHT,DEPTH,...,V_WIND,AIR_TEMPERATURE,BN,ATA_LT,DUBAI,BRENT,WTI,BDI_ADJ,PORT_SIZE,CI_HOUR
0,CN,EKP8,Bulk,30.736578,2020-10-15 4:03,Z517571,30.0,28,73100,20.0,...,3.77,15.9,2.730798,12,42.01,43.16,40.96,1407.668330,0.001660,3.048333
1,CN,EUC8,Container,63.220425,2019-09-17 2:55,U467618,30.0,15,37900,20.0,...,-6.72,24.5,4.289058,10,67.53,64.55,59.34,2089.046774,0.001614,17.138611
2,CN,NGG6,Container,90.427421,2019-02-23 6:43,V378315,50.0,7,115000,20.0,...,0.00,9.4,0.000000,14,65.30,66.39,56.94,603.193047,0.001743,98.827500
3,RU,NNC2,Container,8.813725,2022-08-13 12:57,D215135,30.0,10,27600,10.0,...,2.31,22.8,2.345875,14,90.45,93.65,88.11,1107.944894,0.000197,96.030556
4,CN,NGG6,Container,81.435335,2015-09-08 14:24,Z156413,30.0,22,18100,10.0,...,,,,22,45.75,48.89,45.92,820.288044,0.001743,42.078056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220050,IN,UJM2,Bulk,30.199074,2022-03-23 8:35,Y242521,30.0,2,63500,20.0,...,3.54,36.5,4.306719,14,111.93,120.65,113.90,2077.159292,0.000217,53.400833
220051,CN,QQW1,Bulk,55.408765,2022-06-16 14:27,D236761,30.0,16,26500,10.0,...,0.96,28.2,2.651752,22,108.43,114.13,109.56,2067.433444,0.000595,83.960833
220052,CN,YRT6,Bulk,59.018184,2017-11-11 22:23,J661243,40.0,13,93200,20.0,...,,,,6,61.25,62.21,55.70,1333.609109,0.000360,65.850000
220053,SG,GIW5,Container,1.768630,2022-07-14 7:58,Q635545,30.0,6,25000,20.0,...,3.36,31.7,2.557156,15,97.73,99.10,95.78,1601.291086,0.002615,0.997500


# Data Pre-processing

In [21]:
# datetime 컬럼 처리
train['ATA'] = pd.to_datetime(train['ATA'])
test['ATA'] = pd.to_datetime(test['ATA'])

# datetime을 여러 파생 변수로 변환
for df in [train, test]:
    df['year'] = df['ATA'].dt.year
    df['month'] = df['ATA'].dt.month
    df['day'] = df['ATA'].dt.day
    df['hour'] = df['ATA'].dt.hour
    df['minute'] = df['ATA'].dt.minute
    df['weekday'] = df['ATA'].dt.weekday

# datetime 컬럼 제거
train.drop(columns='ATA', inplace=True)
test.drop(columns='ATA', inplace=True)

In [22]:
# Categorical 컬럼 인코딩
categorical_features = ['ARI_CO', 'ARI_PO', 'SHIP_TYPE_CATEGORY', 'ID', 'SHIPMANAGER', 'FLAG']


for feature in tqdm(categorical_features, desc="Encoding features"):
    encoder = LabelEncoder()
    train[feature] = encoder.fit_transform(train[feature])
    for label in np.unique(test[feature]):
        if label not in encoder.classes_:
            encoder.classes_ = np.append(encoder.classes_, label)
    test[feature] = encoder.transform(test[feature])

# 결측치 처리
train.fillna(train.mean(), inplace=True)
test.fillna(train.mean(), inplace=True)

Encoding features: 100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


In [23]:
train_importance_features = ['month', 'day', 'BDI_ADJ','year','CI_HOUR']
test_importance_features = ['month', 'day', 'BDI_ADJ','year']

In [24]:
train = train[train_importance_features]
test = test[test_importance_features]

In [25]:
train_data = TabularDataset(train)
test_data = TabularDataset(test)

# Autogluon

In [26]:
label = 'CI_HOUR'
eval_metric = 'mean_absolute_error'

In [27]:
predictor2 = TabularPredictor(
    label=label, problem_type='regression', eval_metric=eval_metric
).fit(train_data, 
      presets='best_quality', 
      #num_stack_levels=3,
      #excluded_model_types = excluded_model_types,
      ag_args_fit={'num_gpus': 0}
      #num_gpus=1
      )

No path specified. Models will be saved in: "AutogluonModels\ag-20231017_073916\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20231017_073916\"
AutoGluon Version:  0.8.2
Python Version:     3.8.16
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   159.40 GB / 511.09 GB (31.2%)
Train Data Rows:    220055
Train Data Columns: 4
Label Column: CI_HOUR
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    18316.77 MB
	Train Data (Original)  Memory Usage: 4.4 MB (0.0% of avai

In [28]:
predictor2.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-18.082189,23.948082,145.954401,0.006502,1.310468,3,True,8
1,RandomForestMSE_BAG_L2,-18.112575,19.133026,129.804259,6.004931,105.820826,2,True,6
2,ExtraTreesMSE_BAG_L2,-18.525953,17.93665,38.823107,4.808554,14.839675,2,True,7
3,WeightedEnsemble_L2,-21.070655,6.594833,18.301848,0.0,1.843698,2,True,5
4,RandomForestMSE_BAG_L1,-21.075547,6.120175,16.240772,6.120175,16.240772,1,True,3
5,ExtraTreesMSE_BAG_L1,-23.887711,5.891456,7.332445,5.891456,7.332445,1,True,4
6,KNeighborsDist_BAG_L1,-35.59511,0.474658,0.217378,0.474658,0.217378,1,True,2
7,KNeighborsUnif_BAG_L1,-41.395235,0.641806,0.192838,0.641806,0.192838,1,True,1


In [10]:
predictor = TabularPredictor(
    label=label, problem_type='regression', eval_metric=eval_metric
).fit(train_data, 
      presets='best_quality', 
      #num_stack_levels=3,
      #excluded_model_types = excluded_model_types,
      num_gpus=1)

No path specified. Models will be saved in: "AutogluonModels\ag-20231016_051252\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20231016_051252\"
AutoGluon Version:  0.8.2
Python Version:     3.8.16
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   18.98 GB / 511.09 GB (3.7%)
Train Data Rows:    367441
Train Data Columns: 8
Label Column: CI_HOUR
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    19373.79 MB
	Train Data (Original)  Memory Usage: 19.11 MB (0.1% of avai

In [11]:
predictor.leaderboard(silent = True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-6.812446,4675.638708,5690.760465,0.004978,4.462162,3,True,20
1,RandomForestMSE_BAG_L2,-6.90512,3639.920054,4942.047835,8.908978,334.851027,2,True,14
2,ExtraTreesMSE_BAG_L2,-7.052058,3639.118638,4658.510261,8.107562,51.313452,2,True,16
3,LightGBMLarge_BAG_L2,-7.244969,4665.000014,4872.546138,1033.988938,265.349329,2,True,19
4,CatBoost_BAG_L2,-7.596639,3631.997084,5767.023896,0.986009,1159.827087,2,True,15
5,LightGBM_BAG_L2,-7.723093,3726.550966,4732.655613,95.53989,125.458805,2,True,13
6,NeuralNetTorch_BAG_L2,-8.172598,3632.735814,5086.097947,1.724738,478.901138,2,True,18
7,NeuralNetFastAI_BAG_L2,-8.622067,3634.029923,4886.59607,3.018848,279.399261,2,True,17
8,LightGBMXT_BAG_L2,-8.740319,4406.466877,4869.027779,775.455802,261.83097,2,True,12
9,WeightedEnsemble_L2,-8.922663,1390.256731,319.319569,0.0,5.295137,2,True,11


In [12]:
predictor.feature_importance(train_data) 

Computing feature importance via permutation shuffling for 8 features using 5000 rows with 5 shuffle sets...
	1721.4s	= Expected runtime (344.28s per shuffle set)
	756.12s	= Actual runtime (Completed 5 of 5 shuffle sets)


Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
month,940.737553,16.381477,1.10292e-08,5,974.467242,907.007865
DUBAI,850.429598,11.924667,4.637661e-09,5,874.982653,825.876543
WTI,841.952225,8.620641,1.318648e-09,5,859.702243,824.202206
BDI_ADJ,838.793221,6.990974,5.789914e-10,5,853.187733,824.398709
year,837.225778,10.454654,2.917153e-09,5,858.752055,815.699502
BRENT,764.631709,4.940292,2.091021e-10,5,774.803838,754.459579
OIL,693.89149,10.005778,5.186783e-09,5,714.493526,673.289454
day,184.866021,1.399382,3.939714e-10,5,187.747368,181.984674


In [13]:
model_to_use = predictor.get_model_best()
model_pred = predictor.predict(test_data, model=model_to_use)

In [14]:
pred_y = np.where(model_pred < 0, 0, model_pred)
pred_y

array([  94.205536,  378.5741  ,    8.05019 , ...,    7.673829,
          9.269344, 1161.8945  ], dtype=float32)

In [15]:
submit = pd.read_csv('../open/sample_submission.csv')
submit['CI_HOUR'] = pred_y
submit.to_csv('../Sub/autogluon_1.csv', index=False)

# Sep

In [11]:
submission = pd.read_csv('sample_submission.csv')

In [13]:
from autogluon.tabular import TabularDataset, TabularPredictor

# 각 ARI_CO 별로 데이터 분할 후 학습 및 예측
unique_ari_co = train['year'].unique()
predictors = {}

for ari_co in unique_ari_co:
    # ARI_CO 별 데이터 분할
    train_subset = train[train['year'] == ari_co].copy()
    test_subset = test[test['year'] == ari_co].copy()
    train_subset.drop(['year'],axis=1,inplace=True)
    test_subset.drop(['year'],axis=1,inplace=True)    

    # 데이터셋 변환
    train_data = TabularDataset(train_subset)
    test_data = TabularDataset(test_subset)
    predictors[ari_co] = TabularPredictor(label='CI_HOUR', eval_metric='mean_absolute_error').fit(
        train_data, 
        presets='best_quality',
        num_gpus=1
    )
    # 예측 및 결과 저장
    y_pred = predictors[ari_co].predict(test_data)
    submission.loc[test_subset.index, 'CI_HOUR'] = y_pred.values
    print(ari_co)
    print(ari_co)
    print(ari_co)
    print(ari_co)
    print(ari_co)
    print(len(train_subset))
    print(len(train_subset))
    print(len(train_subset))
    print(len(train_subset))
    print(len(train_subset))
    #predictor.leaderboard(train, silent=True)    

# 최종 결과 확인
print(submission)
submission

No path specified. Models will be saved in: "AutogluonModels\ag-20231016_181113\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20231016_181113\"
AutoGluon Version:  0.8.2
Python Version:     3.8.16
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
Disk Space Avail:   153.17 GB / 511.09 GB (30.0%)


Train Data Rows:    25184
Train Data Columns: 7
Label Column: CI_HOUR
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (2152.193889, 0.016388889, 104.15655, 213.23163)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    18779.37 MB
	Train Data (Original)  Memory Usage: 1.21 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 G

2020
2020
2020
2020
2020
25184
25184
25184
25184
25184


	-36.8161	 = Validation score   (-mean_absolute_error)
	0.03s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-32.0095	 = Validation score   (-mean_absolute_error)
	0.02s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-16.0199	 = Validation score   (-mean_absolute_error)
	28.58s	 = Training   runtime
	79.07s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-11.6047	 = Validation score   (-mean_absolute_error)
	24.32s	 = Training   runtime
	68.78s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1

2019
2019
2019
2019
2019
23892
23892
23892
23892
23892


	-37.4332	 = Validation score   (-mean_absolute_error)
	0.05s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-31.8715	 = Validation score   (-mean_absolute_error)
	0.03s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-18.2987	 = Validation score   (-mean_absolute_error)
	37.37s	 = Training   runtime
	148.39s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-10.8411	 = Validation score   (-mean_absolute_error)
	30.07s	 = Training   runtime
	122.79s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_

2022
2022
2022
2022
2022
43318
43318
43318
43318
43318


	-46.4223	 = Validation score   (-mean_absolute_error)
	0.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-40.6739	 = Validation score   (-mean_absolute_error)
	0.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-19.0195	 = Validation score   (-mean_absolute_error)
	22.4s	 = Training   runtime
	43.23s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-13.7381	 = Validation score   (-mean_absolute_error)
	22.71s	 = Training   runtime
	40.75s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 

2015
2015
2015
2015
2015
13452
13452
13452
13452
13452


	-33.4269	 = Validation score   (-mean_absolute_error)
	0.03s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-29.5734	 = Validation score   (-mean_absolute_error)
	0.03s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-17.1654	 = Validation score   (-mean_absolute_error)
	28.53s	 = Training   runtime
	87.45s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-11.6091	 = Validation score   (-mean_absolute_error)
	25.54s	 = Training   runtime
	72.33s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1

2018
2018
2018
2018
2018
26192
26192
26192
26192
26192


	0.03s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-34.9839	 = Validation score   (-mean_absolute_error)
	0.04s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-21.6088	 = Validation score   (-mean_absolute_error)
	25.59s	 = Training   runtime
	62.91s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-12.8288	 = Validation score   (-mean_absolute_error)
	23.79s	 = Training   runtime
	58.62s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are availa

2016
2016
2016
2016
2016
20486
20486
20486
20486
20486


	-34.6147	 = Validation score   (-mean_absolute_error)
	0.02s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-30.2534	 = Validation score   (-mean_absolute_error)
	0.02s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-19.0805	 = Validation score   (-mean_absolute_error)
	26.65s	 = Training   runtime
	78.67s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-12.0091	 = Validation score   (-mean_absolute_error)
	25.1s	 = Training   runtime
	70.45s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 

2017
2017
2017
2017
2017
23785
23785
23785
23785
23785


	-34.506	 = Validation score   (-mean_absolute_error)
	0.05s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-29.7109	 = Validation score   (-mean_absolute_error)
	0.04s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-17.2039	 = Validation score   (-mean_absolute_error)
	33.0s	 = Training   runtime
	120.26s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-10.2372	 = Validation score   (-mean_absolute_error)
	28.83s	 = Training   runtime
	110.5s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 .

2021
2021
2021
2021
2021
36385
36385
36385
36385
36385


	-44.174	 = Validation score   (-mean_absolute_error)
	11.86s	 = Training   runtime
	2.98s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-18.5138	 = Validation score   (-mean_absolute_error)
	12.76s	 = Training   runtime
	1.58s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-21.3487	 = Validation score   (-mean_absolute_error)
	0.37s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-12.7344	 = Validation score   (-mean_absolute_error)
	121.76s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTreesMSE_BAG_L1 ...

2014
2014
2014
2014
2014
1998
1998
1998
1998
1998


	-19.1165	 = Validation score   (-mean_absolute_error)
	0.0s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-7.8999	 = Validation score   (-mean_absolute_error)
	14.94s	 = Training   runtime
	9.59s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
	-8.1474	 = Validation score   (-mean_absolute_error)
	10.05s	 = Training   runtime
	1.72s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ...
Specified total num_gpus: 1, but only 0 are available. Will use 0 instead
	-8.6461	 = Validation score   (-mean_absolute_error)
	0.39s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
Specifi

2023
2023
2023
2023
2023
5363
5363
5363
5363
5363
          SAMPLE_ID      CI_HOUR
0       TEST_000000    93.441460
1       TEST_000001   383.563049
2       TEST_000002     9.032112
3       TEST_000003    10.249702
4       TEST_000004     6.499660
...             ...          ...
244984  TEST_244984    45.439926
244985  TEST_244985   485.015625
244986  TEST_244986     7.015471
244987  TEST_244987    10.500275
244988  TEST_244988  1043.593994

[244989 rows x 2 columns]


Unnamed: 0,SAMPLE_ID,CI_HOUR
0,TEST_000000,93.441460
1,TEST_000001,383.563049
2,TEST_000002,9.032112
3,TEST_000003,10.249702
4,TEST_000004,6.499660
...,...,...
244984,TEST_244984,45.439926
244985,TEST_244985,485.015625
244986,TEST_244986,7.015471
244987,TEST_244987,10.500275


In [17]:
submission.to_csv('abc.csv',index=False)

In [14]:
test = pd.read_csv('test.csv')
all = pd.concat([test,submission],axis=1)
#all['CI_HOUR'][all['DIST'] == 0] = 0
all.loc[all['DIST'] == 0, 'CI_HOUR'] = 0
submission['CI_HOUR'] = all['CI_HOUR']
submission['CI_HOUR'][submission['CI_HOUR'] < 0] = 0
submission.to_csv('sep_testing_v22.csv',index=False)
submission

SettingWithCopyError: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy