In [1]:
import pandas as pd
import random
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split # 학습, 테스트set 구분
from sklearn.tree import export_graphviz # tree 시각화를 위해
import graphviz # tree 시각화
from sklearn.metrics import f1_score# 성능지표를 계산하기 위해 import
from sklearn.model_selection import cross_val_score, cross_validate # 교차검증

In [2]:
train_df = pd.read_csv('/content/drive/MyDrive/LGaimers/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/LGaimers/test.csv')

In [3]:
print(train_df.shape)
print(test_df.shape)

(598, 2881)
(310, 2879)


## 사용 함수 정의

In [4]:
# 모두 결측치인 변수 제거
def remove_all_nan(dataframe):
  col_list = dataframe.columns
  nan_list = []
  nan_cnt = []
  nan_col = []
  full_list = []

  for col in col_list:
    if dataframe[col].isnull().sum() == 0:
      full_list.append(col)
      continue
    nan_list.append([col,dataframe[col].isnull().sum()])
    nan_cnt.append(dataframe[col].isnull().sum())
    nan_col.append(col)

  del_col = []
  for nan in nan_list:
    if nan[1] == len(dataframe):
      del_col.append(nan[0])
  
  return dataframe.drop(columns=del_col)

In [5]:
'''값이 1개 존재하면 제거'''

def remove_one_value(dataframe):
  del_col = []
  col_list = dataframe.columns
  for col in col_list:
      if dataframe[col].nunique()==1 :
          del_col.append(col)
  
  return dataframe.drop(columns=del_col)

In [6]:
# iterativeimputer 함수 정의
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

def iterativeimputer_subset(input_df,target_df,LINE_NUM):
  imputer = IterativeImputer(random_state=1234)

  cols = input_df.columns[:-1 * LINE_NUM]
  key_cols = input_df.columns[-1 * LINE_NUM:]

  total_subset = target_df[key_cols]
  interval = 30
  
  for i in range(0,len(cols)//interval):
    print(i,'번째 merge 진행중 ...')
    subset = pd.concat([input_df[key_cols], input_df[cols[i*interval : (i*interval)+interval]]], axis=1)
    imputer.fit(subset)

    target_subset = pd.concat([target_df[key_cols], target_df[cols[i*interval : (i*interval)+interval]]], axis=1)
    impute_subset = pd.DataFrame(imputer.transform(target_subset), columns=target_subset.columns)
    impute_subset = impute_subset.drop(key_cols,axis=1)

    print("기존 total : ", total_subset.shape, "기존 impute_subset : ", impute_subset.shape)
    total_subset = pd.concat([total_subset.reset_index(drop=True), impute_subset.reset_index(drop=True)], axis=1)
    print("병합 total : ", total_subset.shape)
    print('------------------------------------------------------')

  subset = pd.concat([input_df[key_cols], input_df[cols[total_subset.shape[1]-len(input_df.columns) : ]]], axis=1)
  imputer.fit(subset)

  target_subset = pd.concat([target_df[key_cols], target_df[cols[total_subset.shape[1]-len(input_df.columns) : ]]], axis=1)
  impute_subset = pd.DataFrame(imputer.transform(target_subset), columns=target_subset.columns)
  impute_subset = impute_subset.drop(key_cols,axis=1)

  print("기존 total : ", total_subset.shape, "기존 impute_subset : ", impute_subset.shape)
  total_subset = pd.concat([total_subset.reset_index(drop=True), impute_subset.reset_index(drop=True)], axis=1)
  print("병합 total : ", total_subset.shape)
  print('------------------------------------------------------')

  return total_subset

# PRODUCT CODE별 dataset 구축

In [7]:
trainA_31 = train_df[train_df['PRODUCT_CODE'] == 'A_31']
trainT_31 = train_df[train_df['PRODUCT_CODE'] == 'T_31']
trainO_31 = train_df[train_df['PRODUCT_CODE'] == 'O_31']

In [8]:
testA_31 = test_df[test_df['PRODUCT_CODE'] == 'A_31']
testT_31 = test_df[test_df['PRODUCT_CODE'] == 'T_31']
testO_31 = test_df[test_df['PRODUCT_CODE'] == 'O_31']

In [9]:
# 모두 NaN인 feature 제거
trainA_31 = remove_all_nan(trainA_31)
trainT_31 = remove_all_nan(trainT_31)
trainO_31 = remove_all_nan(trainO_31)

In [10]:
# 모두 NaN인 feature 제거
trainA_31 = remove_one_value(trainA_31)
trainT_31 = remove_one_value(trainT_31)
trainO_31 = remove_one_value(trainO_31)

In [11]:
print(trainA_31.shape, trainT_31.shape, trainO_31.shape)

(249, 1870) (343, 554) (6, 503)


In [12]:
trainA_31_x = trainA_31.drop(columns=['PRODUCT_ID','TIMESTAMP','Y_Class','Y_Quality'])
trainT_31_x = trainT_31.drop(columns=['PRODUCT_ID','TIMESTAMP','Y_Class','Y_Quality'])
trainO_31_x = trainO_31.drop(columns=['PRODUCT_ID','TIMESTAMP','Y_Class','Y_Quality'])

In [13]:
trainA_31_x = pd.get_dummies(trainA_31_x)
trainT_31_x = pd.get_dummies(trainT_31_x)
trainO_31_x = pd.get_dummies(trainO_31_x)

In [14]:
trainA_31_x = pd.concat([trainA_31_x, trainA_31['Y_Class'], trainA_31['Y_Quality']], axis=1)
trainT_31_x = pd.concat([trainT_31_x, trainT_31['Y_Class'], trainT_31['Y_Quality']], axis=1)
trainO_31_x = pd.concat([trainO_31_x, trainO_31['Y_Class'], trainO_31['Y_Quality']], axis=1)

## PRODUCT_CODE 별 iterativeimputer 적용
- 이때 Y_Class와 Y_Quality는 key_cols에 포함시킴.
- test에 transform시에는 해당 column 제거함.

In [15]:
trainA_31_x = iterativeimputer_subset(trainA_31_x,trainA_31_x,6)
trainT_31_x = iterativeimputer_subset(trainT_31_x,trainT_31_x,4)
trainO_31_x = iterativeimputer_subset(trainO_31_x,trainO_31_x,4)

0 번째 merge 진행중 ...




기존 total :  (249, 6) 기존 impute_subset :  (249, 30)
병합 total :  (249, 36)
------------------------------------------------------
1 번째 merge 진행중 ...
기존 total :  (249, 36) 기존 impute_subset :  (249, 30)
병합 total :  (249, 66)
------------------------------------------------------
2 번째 merge 진행중 ...




기존 total :  (249, 66) 기존 impute_subset :  (249, 30)
병합 total :  (249, 96)
------------------------------------------------------
3 번째 merge 진행중 ...




기존 total :  (249, 96) 기존 impute_subset :  (249, 30)
병합 total :  (249, 126)
------------------------------------------------------
4 번째 merge 진행중 ...
기존 total :  (249, 126) 기존 impute_subset :  (249, 30)
병합 total :  (249, 156)
------------------------------------------------------
5 번째 merge 진행중 ...
기존 total :  (249, 156) 기존 impute_subset :  (249, 30)
병합 total :  (249, 186)
------------------------------------------------------
6 번째 merge 진행중 ...




기존 total :  (249, 186) 기존 impute_subset :  (249, 30)
병합 total :  (249, 216)
------------------------------------------------------
7 번째 merge 진행중 ...




기존 total :  (249, 216) 기존 impute_subset :  (249, 30)
병합 total :  (249, 246)
------------------------------------------------------
8 번째 merge 진행중 ...
기존 total :  (249, 246) 기존 impute_subset :  (249, 30)
병합 total :  (249, 276)
------------------------------------------------------
9 번째 merge 진행중 ...
기존 total :  (249, 276) 기존 impute_subset :  (249, 30)
병합 total :  (249, 306)
------------------------------------------------------
10 번째 merge 진행중 ...
기존 total :  (249, 306) 기존 impute_subset :  (249, 30)
병합 total :  (249, 336)
------------------------------------------------------
11 번째 merge 진행중 ...
기존 total :  (249, 336) 기존 impute_subset :  (249, 30)
병합 total :  (249, 366)
------------------------------------------------------
12 번째 merge 진행중 ...
기존 total :  (249, 366) 기존 impute_subset :  (249, 30)
병합 total :  (249, 396)
------------------------------------------------------
13 번째 merge 진행중 ...
기존 total :  (249, 396) 기존 impute_subset :  (249, 30)
병합 total :  (249, 426)
--------------------



기존 total :  (249, 516) 기존 impute_subset :  (249, 30)
병합 total :  (249, 546)
------------------------------------------------------
18 번째 merge 진행중 ...
기존 total :  (249, 546) 기존 impute_subset :  (249, 30)
병합 total :  (249, 576)
------------------------------------------------------
19 번째 merge 진행중 ...
기존 total :  (249, 576) 기존 impute_subset :  (249, 30)
병합 total :  (249, 606)
------------------------------------------------------
20 번째 merge 진행중 ...




기존 total :  (249, 606) 기존 impute_subset :  (249, 30)
병합 total :  (249, 636)
------------------------------------------------------
21 번째 merge 진행중 ...
기존 total :  (249, 636) 기존 impute_subset :  (249, 30)
병합 total :  (249, 666)
------------------------------------------------------
22 번째 merge 진행중 ...
기존 total :  (249, 666) 기존 impute_subset :  (249, 30)
병합 total :  (249, 696)
------------------------------------------------------
23 번째 merge 진행중 ...
기존 total :  (249, 696) 기존 impute_subset :  (249, 30)
병합 total :  (249, 726)
------------------------------------------------------
24 번째 merge 진행중 ...
기존 total :  (249, 726) 기존 impute_subset :  (249, 30)
병합 total :  (249, 756)
------------------------------------------------------
25 번째 merge 진행중 ...
기존 total :  (249, 756) 기존 impute_subset :  (249, 30)
병합 total :  (249, 786)
------------------------------------------------------
26 번째 merge 진행중 ...
기존 total :  (249, 786) 기존 impute_subset :  (249, 30)
병합 total :  (249, 816)
------------------



기존 total :  (249, 1116) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1146)
------------------------------------------------------
38 번째 merge 진행중 ...
기존 total :  (249, 1146) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1176)
------------------------------------------------------
39 번째 merge 진행중 ...
기존 total :  (249, 1176) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1206)
------------------------------------------------------
40 번째 merge 진행중 ...




기존 total :  (249, 1206) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1236)
------------------------------------------------------
41 번째 merge 진행중 ...




기존 total :  (249, 1236) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1266)
------------------------------------------------------
42 번째 merge 진행중 ...
기존 total :  (249, 1266) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1296)
------------------------------------------------------
43 번째 merge 진행중 ...




기존 total :  (249, 1296) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1326)
------------------------------------------------------
44 번째 merge 진행중 ...




기존 total :  (249, 1326) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1356)
------------------------------------------------------
45 번째 merge 진행중 ...




기존 total :  (249, 1356) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1386)
------------------------------------------------------
46 번째 merge 진행중 ...




기존 total :  (249, 1386) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1416)
------------------------------------------------------
47 번째 merge 진행중 ...




기존 total :  (249, 1416) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1446)
------------------------------------------------------
48 번째 merge 진행중 ...




기존 total :  (249, 1446) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1476)
------------------------------------------------------
49 번째 merge 진행중 ...
기존 total :  (249, 1476) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1506)
------------------------------------------------------
50 번째 merge 진행중 ...
기존 total :  (249, 1506) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1536)
------------------------------------------------------
51 번째 merge 진행중 ...




기존 total :  (249, 1536) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1566)
------------------------------------------------------
52 번째 merge 진행중 ...
기존 total :  (249, 1566) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1596)
------------------------------------------------------
53 번째 merge 진행중 ...
기존 total :  (249, 1596) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1626)
------------------------------------------------------
54 번째 merge 진행중 ...




기존 total :  (249, 1626) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1656)
------------------------------------------------------
55 번째 merge 진행중 ...




기존 total :  (249, 1656) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1686)
------------------------------------------------------
56 번째 merge 진행중 ...
기존 total :  (249, 1686) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1716)
------------------------------------------------------
57 번째 merge 진행중 ...
기존 total :  (249, 1716) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1746)
------------------------------------------------------
58 번째 merge 진행중 ...
기존 total :  (249, 1746) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1776)
------------------------------------------------------
59 번째 merge 진행중 ...
기존 total :  (249, 1776) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1806)
------------------------------------------------------
60 번째 merge 진행중 ...




기존 total :  (249, 1806) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1836)
------------------------------------------------------
61 번째 merge 진행중 ...
기존 total :  (249, 1836) 기존 impute_subset :  (249, 30)
병합 total :  (249, 1866)
------------------------------------------------------




기존 total :  (249, 1866) 기존 impute_subset :  (249, 5)
병합 total :  (249, 1871)
------------------------------------------------------
0 번째 merge 진행중 ...
기존 total :  (343, 4) 기존 impute_subset :  (343, 30)
병합 total :  (343, 34)
------------------------------------------------------
1 번째 merge 진행중 ...
기존 total :  (343, 34) 기존 impute_subset :  (343, 30)
병합 total :  (343, 64)
------------------------------------------------------
2 번째 merge 진행중 ...




기존 total :  (343, 64) 기존 impute_subset :  (343, 30)
병합 total :  (343, 94)
------------------------------------------------------
3 번째 merge 진행중 ...
기존 total :  (343, 94) 기존 impute_subset :  (343, 30)
병합 total :  (343, 124)
------------------------------------------------------
4 번째 merge 진행중 ...
기존 total :  (343, 124) 기존 impute_subset :  (343, 30)
병합 total :  (343, 154)
------------------------------------------------------
5 번째 merge 진행중 ...
기존 total :  (343, 154) 기존 impute_subset :  (343, 30)
병합 total :  (343, 184)
------------------------------------------------------
6 번째 merge 진행중 ...
기존 total :  (343, 184) 기존 impute_subset :  (343, 30)
병합 total :  (343, 214)
------------------------------------------------------
7 번째 merge 진행중 ...
기존 total :  (343, 214) 기존 impute_subset :  (343, 30)
병합 total :  (343, 244)
------------------------------------------------------
8 번째 merge 진행중 ...
기존 total :  (343, 244) 기존 impute_subset :  (343, 30)
병합 total :  (343, 274)
---------------------------



기존 total :  (6, 64) 기존 impute_subset :  (6, 30)
병합 total :  (6, 94)
------------------------------------------------------
3 번째 merge 진행중 ...
기존 total :  (6, 94) 기존 impute_subset :  (6, 30)
병합 total :  (6, 124)
------------------------------------------------------
4 번째 merge 진행중 ...
기존 total :  (6, 124) 기존 impute_subset :  (6, 30)
병합 total :  (6, 154)
------------------------------------------------------
5 번째 merge 진행중 ...
기존 total :  (6, 154) 기존 impute_subset :  (6, 30)
병합 total :  (6, 184)
------------------------------------------------------
6 번째 merge 진행중 ...
기존 total :  (6, 184) 기존 impute_subset :  (6, 30)
병합 total :  (6, 214)
------------------------------------------------------
7 번째 merge 진행중 ...
기존 total :  (6, 214) 기존 impute_subset :  (6, 30)
병합 total :  (6, 244)
------------------------------------------------------
8 번째 merge 진행중 ...
기존 total :  (6, 244) 기존 impute_subset :  (6, 30)
병합 total :  (6, 274)
------------------------------------------------------
9 번째 merge 진행중

In [16]:
#test에 transform을 위해 Y_Class, Y_Quality drop
trainA_31_x = trainA_31_x.drop(columns=['Y_Class','Y_Quality'])
trainT_31_x = trainT_31_x.drop(columns=['Y_Class','Y_Quality'])
trainO_31_x = trainO_31_x.drop(columns=['Y_Class','Y_Quality'])

In [17]:
new_col = trainA_31_x.columns[4:].to_list() + trainA_31_x.columns[:4].to_list()
trainA_31_x = trainA_31_x[new_col]

new_col = trainT_31_x.columns[2:].to_list() + trainT_31_x.columns[:2].to_list()
trainT_31_x = trainT_31_x[new_col]

new_col = trainO_31_x.columns[2:].to_list() + trainO_31_x.columns[:2].to_list()
trainO_31_x = trainO_31_x[new_col]

In [18]:
trainA_31_x

Unnamed: 0,X_128,X_129,X_130,X_131,X_132,X_133,X_134,X_136,X_137,X_138,...,X_2865,X_2866,X_2867,X_2868,X_2869,X_2870,LINE_T010305,LINE_T010306,LINE_T050304,LINE_T050307
0,7.813000e+03,7.813000e+03,3.149602e+15,3.149598e+15,0.190000,0.200000,0.190000,2.280000e+02,2.280000e+02,2.250000e+02,...,353.0,39.340000,40.890000,32.560000,34.090000,77.770000,0,0,1,0
1,-1.141903e+17,-1.064616e+17,1.985400e+04,1.985400e+04,0.200000,0.210000,0.200000,4.130000e+02,4.140000e+02,4.140000e+02,...,353.0,38.890000,42.820000,43.920000,35.340000,72.550000,0,0,0,1
2,7.815000e+03,7.815000e+03,3.149602e+15,3.149598e+15,0.190000,0.200000,0.190000,2.280000e+02,2.280000e+02,2.250000e+02,...,353.0,39.190000,36.650000,42.470000,36.530000,78.350000,0,0,1,0
3,-1.142158e+17,-1.064853e+17,1.985600e+04,1.985600e+04,0.200000,0.210000,0.200000,4.140000e+02,4.140000e+02,4.140000e+02,...,353.0,37.740000,39.170000,52.170000,30.580000,71.780000,0,0,0,1
4,7.817000e+03,7.817000e+03,3.149602e+15,3.149598e+15,0.190000,0.200000,0.180000,2.280000e+02,2.280000e+02,2.250000e+02,...,352.0,38.700000,41.890000,46.930000,33.090000,76.970000,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,1.192000e+04,1.192000e+04,3.149602e+15,3.149598e+15,0.190000,0.200000,0.190000,2.300000e+02,2.300000e+02,2.300000e+02,...,353.0,51.710000,59.640000,54.610000,57.050000,63.180000,0,0,1,0
245,1.409776e+14,1.872498e+14,-2.397916e+15,-2.397913e+15,-9694.119387,549.464754,-12757.683464,1.652283e+08,-2.676225e+08,1.634770e+08,...,432.0,51.332433,54.140724,49.626150,52.619518,66.239351,1,0,0,0
246,-5.682102e+15,-5.110534e+15,-1.560389e+16,-1.560387e+16,259911.553936,-14851.198742,342219.184064,-4.430912e+09,7.183639e+09,-4.384359e+09,...,432.0,51.332433,54.140712,49.626150,52.619523,66.239340,0,1,0,0
247,1.481000e+04,1.481000e+04,3.149602e+15,3.149598e+15,0.190000,0.200000,0.190000,3.040000e+02,3.040000e+02,3.040000e+02,...,353.0,49.470000,53.070000,50.890000,55.100000,66.490000,0,0,1,0


In [19]:
testA_31_x = pd.concat([testA_31[trainA_31_x.columns[:-4]], testA_31['LINE']], axis=1)
testA_31_x = pd.get_dummies(testA_31_x)

testT_31_x = pd.concat([testT_31[trainT_31_x.columns[:-2]], testT_31['LINE']], axis=1)
testT_31_x = pd.get_dummies(testT_31_x)

testO_31_x = pd.concat([testO_31[trainO_31_x.columns[:-2]], testO_31['LINE']], axis=1)
testO_31_x = pd.get_dummies(testO_31_x)

In [20]:
testA_31_x

Unnamed: 0,X_128,X_129,X_130,X_131,X_132,X_133,X_134,X_136,X_137,X_138,...,X_2865,X_2866,X_2867,X_2868,X_2869,X_2870,LINE_T010305,LINE_T010306,LINE_T050304,LINE_T050307
3,,,,,,,,,,,...,423.0,,,,,,1,0,0,0
4,,,,,,,,,,,...,423.0,,,,,,0,1,0,0
5,,,,,,,,,,,...,422.0,,,,,,0,1,0,0
6,,,,,,,,,,,...,423.0,,,,,,1,0,0,0
7,18031.0,18031.0,,,0.19,0.20,0.19,354.0,354.0,354.0,...,354.0,57.74,52.51,54.45,57.99,63.16,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,,,19591.0,24422.0,0.21,0.22,0.21,806.0,805.0,796.0,...,352.0,52.97,58.06,44.11,56.33,62.00,0,0,0,1
285,,,19621.0,24452.0,0.21,0.22,0.21,806.0,805.0,796.0,...,352.0,54.24,55.55,51.60,48.53,61.95,0,0,0,1
286,,,19646.0,24476.0,0.21,0.22,0.21,807.0,806.0,796.0,...,352.0,58.06,57.92,49.06,48.26,62.77,0,0,0,1
292,,,20153.0,24984.0,0.21,0.22,0.21,821.0,820.0,810.0,...,343.0,53.55,52.68,49.97,56.66,63.52,0,0,0,1


In [21]:
testA_31_x = iterativeimputer_subset(trainA_31_x,testA_31_x,4)
testT_31_x = iterativeimputer_subset(trainT_31_x,testT_31_x,2)
testO_31_x = iterativeimputer_subset(trainO_31_x,testO_31_x,2)

# testA_31_x = testA_31_x.fillna(-1)
# testT_31_x = testT_31_x.fillna(-1)
# testO_31_x = testO_31_x.fillna(-1)

Dataset:  
- trainA_31  
- trainT_31  
- trainO_31  

test 결측치
  - train에 iterativeimputer fit, test에 transform 적용

Dataset:
- trainA_31_x, testA_31_x
- trainT_31_x, testT_31_x 
- trainO_31_x, testO_31_x

In [22]:
# classification
trainA_31_y_c = trainA_31['Y_Class']
trainT_31_y_c = trainT_31['Y_Class']
trainO_31_y_c = trainO_31['Y_Class']

# regression
trainA_31_y_r = trainA_31['Y_Quality']
trainT_31_y_r = trainT_31['Y_Quality']
trainO_31_y_r = trainO_31['Y_Quality']

## Model

In [23]:
!pip install --target=$my_path catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1.1-cp38-none-manylinux1_x86_64.whl (76.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.6/76.6 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.1.1


In [24]:
from catboost import *

In [25]:
model = CatBoostRegressor(random_state=1234,verbose=500,iterations=1500,learning_rate=0.033)
model.fit(trainA_31_x, trainA_31_y_r)
pred_a = model.predict(testA_31_x)

0:	learn: 0.0097728	total: 283ms	remaining: 7m 4s
500:	learn: 0.0001970	total: 1m 45s	remaining: 3m 30s
1000:	learn: 0.0000132	total: 3m 20s	remaining: 1m 40s
1499:	learn: 0.0000011	total: 4m 58s	remaining: 0us


In [26]:
#model = CatBoostRegressor(random_state=110,verbose=500,iterations=500)
model.fit(trainT_31_x, trainT_31_y_r)
pred_t = model.predict(testT_31_x)

0:	learn: 0.0047261	total: 59.1ms	remaining: 1m 28s
500:	learn: 0.0005002	total: 33.3s	remaining: 1m 6s
1000:	learn: 0.0000526	total: 56.1s	remaining: 28s
1499:	learn: 0.0000063	total: 1m 18s	remaining: 0us


In [27]:
#model = CatBoostRegressor(random_state=110,verbose=500,iterations=500)
model.fit(trainO_31_x, trainO_31_y_r)
pred_o = model.predict(testO_31_x)

0:	learn: 0.0031679	total: 3.35ms	remaining: 5.02s
500:	learn: 0.0000473	total: 2.12s	remaining: 4.22s
1000:	learn: 0.0000008	total: 4.43s	remaining: 2.21s
1499:	learn: 0.0000000	total: 6.8s	remaining: 0us


In [28]:
testA_31['Y_quanlity'] = pred_a
testT_31['Y_quanlity'] = pred_t
testO_31['Y_quanlity'] = pred_o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testA_31['Y_quanlity'] = pred_a
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testT_31['Y_quanlity'] = pred_t
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testO_31['Y_quanlity'] = pred_o


In [29]:
testA_31['Y_Class'] = 1
testT_31['Y_Class'] = 1
testO_31['Y_Class'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testA_31['Y_Class'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testT_31['Y_Class'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testO_31['Y_Class'] = 1


In [30]:
testA_31.loc[(testA_31['Y_quanlity']<0.52507), 'Y_Class'] = 0
testA_31.loc[(testA_31['Y_quanlity']>0.5349), 'Y_Class'] = 2

testT_31.loc[(testT_31['Y_quanlity']<0.52507), 'Y_Class'] = 0
testT_31.loc[(testT_31['Y_quanlity']>0.5349), 'Y_Class'] = 2

testO_31.loc[(testO_31['Y_quanlity']<0.52507), 'Y_Class'] = 0
testO_31.loc[(testO_31['Y_quanlity']>0.5349), 'Y_Class'] = 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [31]:
submita = pd.read_csv('/content/drive/MyDrive/LGaimers/sample_submission.csv')
submitt = pd.read_csv('/content/drive/MyDrive/LGaimers/sample_submission.csv')
submito = pd.read_csv('/content/drive/MyDrive/LGaimers/sample_submission.csv')

In [32]:
submita = pd.merge(submita[['PRODUCT_ID']],testA_31[['PRODUCT_ID','Y_Class']],on='PRODUCT_ID')
submitt = pd.merge(submitt[['PRODUCT_ID']],testT_31[['PRODUCT_ID','Y_Class']],on='PRODUCT_ID')
submito = pd.merge(submito[['PRODUCT_ID']],testO_31[['PRODUCT_ID','Y_Class']],on='PRODUCT_ID')

In [33]:
pd.concat([submita,submitt,submito]).sort_values(by='PRODUCT_ID').to_csv('iterativeimputer.csv',index=False)

In [34]:
result = pd.read_csv('/content/iterativeimputer.csv')
cat2 = pd.read_csv('/content/캣2_5.csv')

In [35]:
result['Y_Class'].value_counts()

1    278
2     32
Name: Y_Class, dtype: int64

In [36]:
from collections import Counter
Counter(result['Y_Class'] != cat2['Y_Class'])

Counter({False: 251, True: 59})

In [37]:
cat2['Y_Class'].value_counts()

1    275
0     30
2      5
Name: Y_Class, dtype: int64