In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as ms
import xgboost as xgb
import bisect

from scipy import stats
from tqdm import tqdm
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, KFold
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import DBSCAN

In [2]:
train = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\HD_CNC\train.csv').drop(['X1'], axis=1)
test = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\HD_CNC\test.csv').drop(['Id', 'X1'], axis=1)

display(train.head())
display(test.head())

Unnamed: 0,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,56.3,67.5,22.5,AH32,4,97,PL973,467,2241
1,115.0,67.0,13.0,AH32,2,27,PL271,1058,1163
2,59.2,44.6,18.0,AH32-TM,1,14,PL141,270,1094
3,52.1,97.4,15.0,A,17,24,PL242,391,2604
4,42.9,58.7,14.5,A,4,23,PL233,232,1564


Unnamed: 0,X2,X3,X4,X5,X6,X7,X8
0,80.6,104.8,11.0,A,29,23,PL234
1,141.8,34.2,17.5,A,2,91,PL911
2,77.2,103.3,17.0,A,10,97,PL973
3,95.5,131.0,18.0,A,2,23,PL234
4,91.7,36.1,19.0,AH32,1,27,PL271


| 항목 | 설명     |
|-----|----------|
| X1  | 작업번호 |
| X2  | 마킹길이 |
| X3  | 절단길이 |
| X4  | 철판두께 |
| X5  | 철판재질 |
| X6  | 절단갯수 |
| X7  | 작업장   |
| X8  | 작업자   |
| Y1  | 마킹시간 |
| Y2  | 절단시간 |

In [3]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121754 entries, 0 to 121753
Data columns (total 9 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   X2      121754 non-null  float64
 1   X3      121754 non-null  float64
 2   X4      121754 non-null  float64
 3   X5      121754 non-null  object 
 4   X6      121754 non-null  int64  
 5   X7      121754 non-null  int64  
 6   X8      121754 non-null  object 
 7   Y1      121754 non-null  int64  
 8   Y2      121754 non-null  int64  
dtypes: float64(3), int64(4), object(2)
memory usage: 8.4+ MB


In [4]:
train.describe()

Unnamed: 0,X2,X3,X4,X6,X7,Y1,Y2
count,121754.0,121754.0,121754.0,121754.0,121754.0,121754.0,121754.0
mean,77.608615,73.875636,17.444897,13.630049,43.921152,897.1671,2193.206638
std,48.006125,53.267359,4.498316,20.751914,32.509414,46047.04,2299.58539
min,3.2,1.8,6.0,1.0,14.0,0.0,0.0
25%,45.2,40.9,14.5,1.0,22.0,238.0,1064.0
50%,68.1,52.8,17.0,4.0,25.0,379.0,1593.0
75%,98.3,93.0,20.0,18.0,91.0,548.0,2778.0
max,524.4,621.3,40.0,416.0,97.0,6157364.0,348262.0


# 특징 만들기


In [5]:
# train['X8'] = train['X8'].str.extract(r'(PL|PS)')
# test['X8'] = test['X8'].str.extract(r'(PL|PS)')

In [6]:
# train['X8_encoded'] = (train['X8'] != 'PL').astype(int)
# test['X8_encoded'] = (test['X8'] != 'PL').astype(int)

# train = pd.get_dummies(train, columns=['X8_encoded'], prefix=['X8'])
# test = pd.get_dummies(test, columns=['X8_encoded'], prefix=['X8'])

# train[['X8_0', 'X8_1']] = train[['X8_0', 'X8_1']].astype(int)
# test[['X8_0', 'X8_1']] = test[['X8_0', 'X8_1']].astype(int)

In [8]:
mmtime = train.groupby('X5')['Y1'].mean()
mctime = train.groupby('X5')['Y2'].mean()

wmtime = train.groupby('X7')['Y1'].mean()
wctime = train.groupby('X7')['Y2'].mean()

manmtime = train.groupby('X8')['Y1'].mean()
manctime = train.groupby('X8')['Y2'].mean()

train['X5_mmtime'] = train['X5'].map(mmtime)
train['X5_mctime'] = train['X5'].map(mctime)

train['X7_wmtime'] = train['X7'].map(wmtime)
train['X7_wctime'] = train['X7'].map(wctime)

train['X8_manmtime'] = train['X8'].map(manmtime)
train['X8_manctime'] = train['X8'].map(manctime)

test['X5_mmtime'] = test['X5'].map(mmtime)
test['X5_mctime'] = test['X5'].map(mctime)

test['X7_wmtime'] = test['X7'].map(wmtime)
test['X7_wctime'] = test['X7'].map(wctime)

test['X8_manmtime'] = test['X8'].map(manmtime)
test['X8_manctime'] = test['X8'].map(manctime)

In [9]:
import re

def convert_x5(value):
    # -TM 이 포함되어 있으면 제거하고 숫자로 변환, 그렇지 않으면 그냥 숫자로 변환
    if '-TM' in value:
        value = value.replace('-TM', '')
    match = re.search(r'\d+', value)  # 정규표현식을 사용하여 숫자 추출
    if match:
        return int(match.group()) * 9.8
    else:
        return 235  # 숫자가 없는 경우 235로 설정

# X5 열에 변환 함수 적용
train['X5_numeric'] = train['X5'].apply(convert_x5)
test['X5_numeric'] = test['X5'].apply(convert_x5)

train.drop(columns = ['X5'], inplace = True)
test.drop(columns = ['X5'], inplace = True)

In [10]:
def estimate_rotation_speed(X2, X3, X4, X5_numeric, X6):
    rotation_speed = X2 + (X3 / X4) * (X5_numeric / 10) * (X6 / 2)
    return rotation_speed

train['Estimated_Rotation_Speed'] = estimate_rotation_speed(train['X2'], train['X3'], train['X4'], train['X5_numeric'], train['X6'])
test['Estimated_Rotation_Speed'] = estimate_rotation_speed(test['X2'], test['X3'], test['X4'], test['X5_numeric'], test['X6'])

In [11]:
cond_indices = train[(train['X2'] <= 500) | (train['X3'] <= 600) | (train['X4'] <= 36) | (train['X6'] <= 265)].index

train.loc[cond_indices, 'X2'] = np.where(train.loc[cond_indices, 'X2'] > 500, train['X2'].mean(), train.loc[cond_indices, 'X2'])
train.loc[cond_indices, 'X3'] = np.where(train.loc[cond_indices, 'X3'] > 600, train['X3'].mean(), train.loc[cond_indices, 'X3'])
train.loc[cond_indices, 'X4'] = np.where(train.loc[cond_indices, 'X4'] > 36, train['X4'].mean(), train.loc[cond_indices, 'X4'])
train.loc[cond_indices, 'X6'] = np.where(train.loc[cond_indices, 'X6'] > 265, train['X6'].mean(), train.loc[cond_indices, 'X6'])

Y1_idx = train[(train['Y1'] == 0) | (train['Y1'] > 70000)].index
Y2_idx = train[(train['Y2'] == 0) | (train['Y2'] > 70000)].index

train.loc[Y1_idx, 'Y1'] = train.loc[(train['Y1'] != 0) & (train['Y1'] <= 70000), 'Y1'].mean()
train.loc[Y2_idx, 'Y2'] = train.loc[(train['Y2'] != 0) & (train['Y2'] <= 70000), 'Y2'].mean()

In [12]:
scaler = MinMaxScaler()

columns_to_scale = ['X2', 'X3', 'X4', 'X6']

train[columns_to_scale] = scaler.fit_transform(train[columns_to_scale])
test[columns_to_scale] = scaler.transform(test[columns_to_scale])

In [13]:
# exclude_columns = ['cluster']

# num_columns = len(filtered_train.columns) - len(exclude_columns)
# num_rows = (num_columns - 1) // 4 + 1 

# fig, axes = plt.subplots(nrows=num_rows, ncols=4, figsize=(16, num_rows * 4))
# axes = axes.flatten()

# index = 0
# for i, column in enumerate(filtered_train.columns):
#     if column not in exclude_columns:
#         sns.histplot(data=filtered_train, x=column, kde=True, ax=axes[index])
#         axes[index].set_title(f'Histogram of {column}')
#         axes[index].set_xlabel('Values')
#         axes[index].set_ylabel('Frequency')
#         index += 1

# for i in range(index, len(axes)):
#     fig.delaxes(axes[i])

# plt.tight_layout()  
# plt.show()

In [14]:
cat_y1 = CatBoostRegressor(loss_function='MAE',
                           cat_features=['X8'],  # Add categorical features index if any
                           verbose=50
                           )

X_train_reduced = train.drop(columns=['Y1', 'Y2']).reset_index(drop=True)
y_train = train['Y1'].reset_index(drop=True)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

ensemble_predictions = []
scores = []

y_train = y_train.values

for train_idx, val_idx in tqdm(kf.split(X_train_reduced), total=5, desc="Processing folds"):
    X_t, X_val = X_train_reduced.iloc[train_idx], X_train_reduced.iloc[val_idx]
    y_t, y_val = y_train[train_idx], y_train[val_idx]

    cat_y1.fit(X_t, y_t, eval_set=(X_val, y_val))

    val_pred = cat_y1.predict(X_val)

    scores.append(mean_absolute_error(y_val, val_pred))

    test = test[X_train_reduced.columns]

    cat_pred = cat_y1.predict(test)
    cat_pred = np.where(cat_pred < 0, 0, cat_pred)

    ensemble_predictions.append(cat_pred)

final_predictions_y1 = np.mean(ensemble_predictions, axis=0)

print("Validation : MAE scores for each fold:", scores)
print("Validation : MAE:", np.mean(scores))

Processing folds:   0%|          | 0/5 [00:00<?, ?it/s]

0:	learn: 203.0525745	test: 198.7824899	best: 198.7824899 (0)	total: 221ms	remaining: 3m 40s
50:	learn: 125.4676384	test: 122.1278035	best: 122.1278035 (50)	total: 3.65s	remaining: 1m 7s
100:	learn: 112.4670206	test: 109.2862269	best: 109.2862269 (100)	total: 6.67s	remaining: 59.4s
150:	learn: 108.7569737	test: 105.6426319	best: 105.6426319 (150)	total: 9.5s	remaining: 53.4s
200:	learn: 106.9428718	test: 103.9117226	best: 103.9117226 (200)	total: 12.4s	remaining: 49.1s
250:	learn: 105.8859682	test: 102.8918158	best: 102.8918158 (250)	total: 15.3s	remaining: 45.5s
300:	learn: 105.0956365	test: 102.1449412	best: 102.1449412 (300)	total: 18.5s	remaining: 43s
350:	learn: 104.5370529	test: 101.6306457	best: 101.6306457 (350)	total: 22.1s	remaining: 40.8s
400:	learn: 104.0002995	test: 101.1293317	best: 101.1293317 (400)	total: 25.5s	remaining: 38.1s
450:	learn: 103.5143898	test: 100.6906393	best: 100.6906393 (450)	total: 29.1s	remaining: 35.5s
500:	learn: 103.1407727	test: 100.3685686	best: 

Processing folds:  20%|██        | 1/5 [01:02<04:10, 62.69s/it]

999:	learn: 100.8019890	test: 98.5233656	best: 98.5233656 (999)	total: 1m 2s	remaining: 0us

bestTest = 98.52336562
bestIteration = 999

0:	learn: 201.8590840	test: 203.6658363	best: 203.6658363 (0)	total: 80.8ms	remaining: 1m 20s
50:	learn: 124.2065541	test: 126.7169603	best: 126.7169603 (50)	total: 3.43s	remaining: 1m 3s
100:	learn: 111.0528485	test: 113.8448370	best: 113.8448370 (100)	total: 6.6s	remaining: 58.7s
150:	learn: 107.5763470	test: 110.5013166	best: 110.5013166 (150)	total: 10s	remaining: 56.3s
200:	learn: 105.7317169	test: 108.7641224	best: 108.7641224 (200)	total: 13s	remaining: 51.8s
250:	learn: 104.6340595	test: 107.7402562	best: 107.7402562 (250)	total: 15.8s	remaining: 47.2s
300:	learn: 103.8755807	test: 107.0629633	best: 107.0629633 (300)	total: 18.9s	remaining: 43.9s
350:	learn: 103.2670272	test: 106.5381240	best: 106.5381240 (350)	total: 22.1s	remaining: 41s
400:	learn: 102.7134288	test: 106.0525617	best: 106.0525617 (400)	total: 25.1s	remaining: 37.5s
450:	learn

Processing folds:  40%|████      | 2/5 [02:04<03:06, 62.26s/it]

999:	learn: 99.5578720	test: 103.4503981	best: 103.4503981 (999)	total: 1m 1s	remaining: 0us

bestTest = 103.4503981
bestIteration = 999

0:	learn: 202.1583661	test: 201.5147136	best: 201.5147136 (0)	total: 80.5ms	remaining: 1m 20s
50:	learn: 124.9455302	test: 124.4617168	best: 124.4617168 (50)	total: 3.02s	remaining: 56.2s
100:	learn: 111.7935661	test: 111.4583323	best: 111.4583323 (100)	total: 5.9s	remaining: 52.5s
150:	learn: 108.1924056	test: 107.9040542	best: 107.9040542 (150)	total: 8.77s	remaining: 49.3s
200:	learn: 106.4775746	test: 106.2219548	best: 106.2219548 (200)	total: 11.9s	remaining: 47.1s
250:	learn: 105.3964948	test: 105.1893402	best: 105.1893402 (250)	total: 14.8s	remaining: 44s
300:	learn: 104.6103814	test: 104.4525612	best: 104.4525612 (300)	total: 17.7s	remaining: 41.2s
350:	learn: 104.0186441	test: 103.8889660	best: 103.8889660 (350)	total: 20.7s	remaining: 38.3s
400:	learn: 103.5022004	test: 103.4266882	best: 103.4266882 (400)	total: 23.7s	remaining: 35.4s
450:	

Processing folds:  60%|██████    | 3/5 [03:04<02:02, 61.17s/it]

999:	learn: 100.1100364	test: 100.6935579	best: 100.6935579 (999)	total: 59.4s	remaining: 0us

bestTest = 100.6935579
bestIteration = 999

0:	learn: 201.0814823	test: 205.8849368	best: 205.8849368 (0)	total: 70.3ms	remaining: 1m 10s
50:	learn: 124.0343796	test: 128.2224063	best: 128.2224063 (50)	total: 3s	remaining: 55.8s
100:	learn: 111.0071181	test: 115.0819392	best: 115.0819392 (100)	total: 5.91s	remaining: 52.6s
150:	learn: 107.3458631	test: 111.3321073	best: 111.3321073 (150)	total: 8.75s	remaining: 49.2s
200:	learn: 105.5947911	test: 109.5654258	best: 109.5654258 (200)	total: 11.6s	remaining: 46.1s
250:	learn: 104.5560265	test: 108.5518160	best: 108.5518160 (250)	total: 14.5s	remaining: 43.2s
300:	learn: 103.7888574	test: 107.8140285	best: 107.8140285 (300)	total: 17.7s	remaining: 41.1s
350:	learn: 103.2087117	test: 107.2512517	best: 107.2512517 (350)	total: 20.8s	remaining: 38.5s
400:	learn: 102.6442484	test: 106.6960145	best: 106.6960145 (400)	total: 24.5s	remaining: 36.5s
450:

Processing folds:  80%|████████  | 4/5 [04:04<01:00, 60.72s/it]

999:	learn: 99.4188948	test: 104.0106573	best: 104.0106573 (999)	total: 59.5s	remaining: 0us

bestTest = 104.0106573
bestIteration = 999

0:	learn: 202.4539681	test: 200.7660774	best: 200.7660774 (0)	total: 64.2ms	remaining: 1m 4s
50:	learn: 125.4006129	test: 122.8119120	best: 122.8119120 (50)	total: 3.09s	remaining: 57.5s
100:	learn: 112.2575429	test: 109.3393688	best: 109.3393688 (100)	total: 6s	remaining: 53.4s
150:	learn: 108.7536684	test: 105.8727705	best: 105.8727705 (150)	total: 9.22s	remaining: 51.8s
200:	learn: 106.9711583	test: 104.1418651	best: 104.1418651 (200)	total: 12.2s	remaining: 48.4s
250:	learn: 105.8265205	test: 103.0628249	best: 103.0628249 (250)	total: 15.1s	remaining: 44.9s
300:	learn: 105.0686318	test: 102.3690806	best: 102.3690806 (300)	total: 18.1s	remaining: 42s
350:	learn: 104.4759209	test: 101.8407857	best: 101.8407857 (350)	total: 20.9s	remaining: 38.7s
400:	learn: 103.9352823	test: 101.3550644	best: 101.3550644 (400)	total: 23.7s	remaining: 35.5s
450:	lea

Processing folds: 100%|██████████| 5/5 [05:07<00:00, 61.41s/it]

999:	learn: 100.8037147	test: 98.8143030	best: 98.8143030 (999)	total: 1m 1s	remaining: 0us

bestTest = 98.81430304
bestIteration = 999

Validation : MAE scores for each fold: [98.52336662474534, 103.46849050983151, 100.6969750836784, 104.01393686610008, 98.82102734965422]
Validation : MAE: 101.10475928680191





In [15]:
cat_y2 = CatBoostRegressor(loss_function='MAE',
                           cat_features=['X8'],  # Add categorical features index if any
                           verbose=50
                           )

X_train_reduced = train.drop(columns=['Y1', 'Y2']).reset_index(drop=True)
y_train = train['Y2'].reset_index(drop=True)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

ensemble_predictions = []
scores = []

y_train = y_train.values

for train_idx, val_idx in tqdm(kf.split(X_train_reduced), total=5, desc="Processing folds"):
    X_t, X_val = X_train_reduced.iloc[train_idx], X_train_reduced.iloc[val_idx]
    y_t, y_val = y_train[train_idx], y_train[val_idx]

    cat_y2.fit(X_t, y_t, eval_set=(X_val, y_val))

    val_pred = cat_y2.predict(X_val)

    scores.append(mean_absolute_error(y_val, val_pred))

    cat_pred = cat_y2.predict(test)
    cat_pred = np.where(cat_pred < 0, 0, cat_pred)

    ensemble_predictions.append(cat_pred)

final_predictions_y2 = np.mean(ensemble_predictions, axis=0)

print("Validation : MAE scores for each fold:", scores)
print("Validation : MAE:", np.mean(scores))

Processing folds:   0%|          | 0/5 [00:00<?, ?it/s]

0:	learn: 1111.5696306	test: 1100.8529657	best: 1100.8529657 (0)	total: 68.2ms	remaining: 1m 8s
50:	learn: 490.9743618	test: 485.9682540	best: 485.9682540 (50)	total: 3.6s	remaining: 1m 6s
100:	learn: 334.8480921	test: 330.8432616	best: 330.8432616 (100)	total: 6.93s	remaining: 1m 1s
150:	learn: 282.8918230	test: 276.1663612	best: 276.1663612 (150)	total: 9.72s	remaining: 54.7s
200:	learn: 262.6076577	test: 254.7367829	best: 254.7367829 (200)	total: 12.7s	remaining: 50.3s
250:	learn: 252.8200573	test: 244.6034302	best: 244.6034302 (250)	total: 15.9s	remaining: 47.3s
300:	learn: 247.7904401	test: 239.5471547	best: 239.5471547 (300)	total: 18.7s	remaining: 43.4s
350:	learn: 243.9362374	test: 235.8117014	best: 235.8117014 (350)	total: 21.6s	remaining: 40s
400:	learn: 241.1127673	test: 233.1622299	best: 233.1622299 (400)	total: 24.6s	remaining: 36.7s
450:	learn: 238.8394544	test: 231.1021458	best: 231.1021458 (450)	total: 27.5s	remaining: 33.5s
500:	learn: 236.7937057	test: 229.2609140	bes

Processing folds:  20%|██        | 1/5 [01:02<04:09, 62.34s/it]

999:	learn: 225.8658144	test: 219.8820116	best: 219.8820116 (999)	total: 1m 1s	remaining: 0us

bestTest = 219.8820116
bestIteration = 999

0:	learn: 1107.9934242	test: 1115.3415698	best: 1115.3415698 (0)	total: 68.5ms	remaining: 1m 8s
50:	learn: 492.2030966	test: 495.9505412	best: 495.9505412 (50)	total: 3.2s	remaining: 59.6s
100:	learn: 331.2456059	test: 330.2443574	best: 330.2443574 (100)	total: 6.61s	remaining: 58.8s
150:	learn: 281.1894942	test: 279.8669133	best: 279.8669133 (150)	total: 10s	remaining: 56.4s
200:	learn: 261.5451397	test: 260.0247890	best: 260.0247890 (200)	total: 13.1s	remaining: 51.9s
250:	learn: 251.8080687	test: 250.7133548	best: 250.7133548 (250)	total: 16s	remaining: 47.9s
300:	learn: 246.5077262	test: 245.4360049	best: 245.4360049 (300)	total: 19s	remaining: 44.2s
350:	learn: 243.0614357	test: 242.1197909	best: 242.1197909 (350)	total: 22s	remaining: 40.6s
400:	learn: 240.1919851	test: 239.3736568	best: 239.3736568 (400)	total: 24.9s	remaining: 37.2s
450:	lea

Processing folds:  40%|████      | 2/5 [02:03<03:04, 61.44s/it]

999:	learn: 224.7051594	test: 225.0603657	best: 225.0603657 (999)	total: 1m	remaining: 0us

bestTest = 225.0603657
bestIteration = 999

0:	learn: 1106.7502043	test: 1116.0458449	best: 1116.0458449 (0)	total: 73.5ms	remaining: 1m 13s
50:	learn: 486.7377346	test: 498.5615485	best: 498.5615485 (50)	total: 3.18s	remaining: 59.1s
100:	learn: 328.5869826	test: 343.1164354	best: 343.1164354 (100)	total: 6.27s	remaining: 55.8s
150:	learn: 277.2044438	test: 292.1473391	best: 292.1473391 (150)	total: 9.38s	remaining: 52.7s
200:	learn: 258.0580043	test: 273.1114422	best: 273.1114422 (200)	total: 12.5s	remaining: 49.6s
250:	learn: 248.6235901	test: 263.1243514	best: 263.1243514 (250)	total: 15.5s	remaining: 46.4s
300:	learn: 243.2342347	test: 257.6338250	best: 257.6338250 (300)	total: 18.5s	remaining: 43.1s
350:	learn: 239.4451777	test: 253.7027384	best: 253.7027384 (350)	total: 21.6s	remaining: 39.9s
400:	learn: 236.6531553	test: 250.9308163	best: 250.9308163 (400)	total: 24.7s	remaining: 37s
450

Processing folds:  60%|██████    | 3/5 [03:07<02:05, 62.89s/it]

999:	learn: 221.7652768	test: 236.9796261	best: 236.9796261 (999)	total: 1m 4s	remaining: 0us

bestTest = 236.9796261
bestIteration = 999

0:	learn: 1108.9856384	test: 1107.0820565	best: 1107.0820565 (0)	total: 75.8ms	remaining: 1m 15s
50:	learn: 490.2477239	test: 490.5140859	best: 490.5140859 (50)	total: 3.43s	remaining: 1m 3s
100:	learn: 330.5604794	test: 332.7247279	best: 332.7247279 (100)	total: 6.6s	remaining: 58.7s
150:	learn: 279.9919244	test: 283.7006479	best: 283.7006479 (150)	total: 9.97s	remaining: 56s
200:	learn: 259.2388349	test: 264.0221159	best: 264.0221159 (200)	total: 13.9s	remaining: 55.2s
250:	learn: 249.9942478	test: 255.1860157	best: 255.1860157 (250)	total: 17.6s	remaining: 52.4s
300:	learn: 244.6948515	test: 250.2138364	best: 250.2138364 (300)	total: 20.8s	remaining: 48.3s
350:	learn: 240.9242175	test: 246.7921667	best: 246.7921667 (350)	total: 24s	remaining: 44.3s
400:	learn: 237.9812478	test: 244.1174637	best: 244.1174637 (400)	total: 26.8s	remaining: 40.1s
450

Processing folds:  80%|████████  | 4/5 [04:12<01:03, 63.55s/it]

999:	learn: 223.1040339	test: 230.9411890	best: 230.9411890 (999)	total: 1m 4s	remaining: 0us

bestTest = 230.941189
bestIteration = 999

0:	learn: 1110.0750932	test: 1106.1530712	best: 1106.1530712 (0)	total: 79.7ms	remaining: 1m 19s
50:	learn: 493.2636160	test: 483.3094081	best: 483.3094081 (50)	total: 3.5s	remaining: 1m 5s
100:	learn: 336.2899374	test: 326.5253740	best: 326.5253740 (100)	total: 6.96s	remaining: 1m 1s
150:	learn: 282.8628695	test: 274.9746479	best: 274.9746479 (150)	total: 10.4s	remaining: 58.6s
200:	learn: 262.8506463	test: 256.2372915	best: 256.2372915 (200)	total: 13.6s	remaining: 54s
250:	learn: 252.4073785	test: 246.6576085	best: 246.6576085 (250)	total: 16.8s	remaining: 50s
300:	learn: 246.5296815	test: 240.9965667	best: 240.9965667 (300)	total: 20.5s	remaining: 47.5s
350:	learn: 243.1700619	test: 237.7686316	best: 237.7686316 (350)	total: 23.6s	remaining: 43.7s
400:	learn: 240.4632134	test: 235.0889829	best: 235.0889829 (400)	total: 27.2s	remaining: 40.7s
450:

Processing folds: 100%|██████████| 5/5 [05:20<00:00, 64.11s/it]

999:	learn: 225.4309003	test: 220.9483511	best: 220.9483511 (999)	total: 1m 7s	remaining: 0us

bestTest = 220.9483511
bestIteration = 999

Validation : MAE scores for each fold: [219.88201255567265, 225.06036669844434, 237.27549424797704, 230.94118994058036, 220.986773835461]
Validation : MAE: 226.82916745562707





In [16]:
submit = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\HD_CNC\sample_submission.csv')

submit.head()

Unnamed: 0,Id,Predicted
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [17]:
final_predictions = final_predictions_y1 + final_predictions_y2


submit.iloc[:, 1] = final_predictions

submit

Unnamed: 0,Id,Predicted
0,0,2831.654426
1,1,1712.022681
2,2,3180.479417
3,3,4079.165842
4,4,1359.357177
...,...,...
40566,40566,833.252297
40567,40567,1325.971164
40568,40568,2137.377570
40569,40569,1142.117214


In [18]:
submit.to_csv('C:\\Users\\dlwks\\OneDrive\\바탕 화면\\VSCode\\HD_CNC\\1122-1.csv', index=False)