In [15]:
import pandas as pd
import numpy as np
from scipy.stats import skew
from scipy.stats import kurtosis
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [16]:
train=pd.read_csv('train_features.csv')
train_labels=pd.read_csv('train_labels.csv')
test=pd.read_csv('test_features.csv')
sub=pd.read_csv('sample_submission.csv')

In [17]:
train

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,0,0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112
1,0,1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216
2,0,2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629
3,0,3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013
4,0,4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234
...,...,...,...,...,...,...,...,...
1874995,3124,595,-0.712530,-0.658357,0.293707,-29.367857,-104.013664,-76.290437
1874996,3124,596,-0.683037,-0.658466,0.329223,-30.149089,-101.796809,-76.625087
1874997,3124,597,-0.664730,-0.666625,0.364114,-27.873095,-98.776072,-79.365125
1874998,3124,598,-0.630534,-0.682565,0.373696,-23.636550,-99.139495,-80.259478


### 함수 지정

In [18]:
def range_func(x):
    max_val = np.max(x) 
    min_val = np.min(x)
    range_val = max_val - min_val #최대값 - 최소값
    return range_val

def iqr_func2(x):
    q3, q1 = np.percentile(x, [20, 80])
    iqr = q3 - q1
    return iqr # 20% - 80%

def iqr_func3(x):
    q3, q1 = np.percentile(x, [40, 60])
    iqr = q3 - q1
    return iqr # 40% - 60%

def iqr_func4(x):
    q3, q1 = np.percentile(x, [15, 95])
    iqr = q3 - q1
    return iqr # 15% - 95%

def preskew(x):
    return skew(x) #왜도

def prekurt(x):
    return kurtosis(x,fisher=True) #첨도

### 벡터값 추가

In [19]:
train['acc_vector']=np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2)+(train['acc_z'] ** 2)) 
train['gy_vector']=np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2)+(train['gy_z'] ** 2))

test['acc_vector']=np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2)+(test['acc_z'] ** 2))
test['gy_vector']=np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2)+(test['gy_z'] ** 2))

### 자이로스코프 무게중심

In [20]:
train['gy_Centerofgravity']=(train['gy_x']+train['gy_y']+train['gy_z'])/3
test['gy_Centerofgravity']=(test['gy_x']+test['gy_y']+test['gy_z'])/3

### roll, pithch (회전반경)

In [21]:
train['roll'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2))
train['pitch'] = np.arctan(train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2))
train['gy_roll'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2))
train['gy_pitch'] = np.arctan(train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2))

test['pitch'] = np.arctan(test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2))
test['roll'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2))
test['gy_roll'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2))
test['gy_pitch'] = np.arctan(test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2))

### group by mean만 하는과정

In [8]:
features = ['id', 'acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z','acc_vector','gy_vector','gy_Centerofgravity']

features2 = ['id', 'roll','pitch','gy_roll','gy_pitch']

train_preprocess=train[features].groupby('id').mean().copy()
test_preprocess=test[features].groupby('id').mean().copy()

#총 변수 : 19개

#Catboost 정확도 0.72
#randomforest 정확도 0.7 시간은 3배차이

#원래는 0.8 0.79

### 함수변수 추가 후 group by

In [22]:
features = ['id', 'acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z','acc_vector','gy_vector','gy_Centerofgravity']

features2 = ['id', 'roll','pitch','gy_roll','gy_pitch']

train_preprocess = train[features].groupby('id').agg(['max', 'min', 'mean','std','median',range_func,iqr_func2,iqr_func3,iqr_func4,preskew,prekurt])
temp_train_preprocess = train[features2].groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,preskew,prekurt])

test_preprocess = test[features].groupby('id').agg(['max', 'min', 'mean','std','median',range_func,iqr_func2,iqr_func3,iqr_func4,preskew,prekurt])
temp_test_preprocess = test[features2].groupby('id').agg([range_func,iqr_func2,iqr_func3,iqr_func4,preskew,prekurt])

train_preprocess=pd.concat([train_preprocess,temp_train_preprocess],axis=1)
test_preprocess=pd.concat([test_preprocess,temp_test_preprocess],axis=1)

In [23]:
train_preprocess.columns=[i[0]+'_'+i[1] for i in train_preprocess.columns]
test_preprocess.columns=[i[0]+'_'+i[1] for i in test_preprocess.columns]

In [24]:
#중간점검
train_preprocess #123개

Unnamed: 0_level_0,acc_x_max,acc_x_min,acc_x_mean,acc_x_std,acc_x_median,acc_x_range_func,acc_x_iqr_func2,acc_x_iqr_func3,acc_x_iqr_func4,acc_x_preskew,...,gy_roll_iqr_func3,gy_roll_iqr_func4,gy_roll_preskew,gy_roll_prekurt,gy_pitch_range_func,gy_pitch_iqr_func2,gy_pitch_iqr_func3,gy_pitch_iqr_func4,gy_pitch_preskew,gy_pitch_prekurt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.344268,0.591940,0.931329,0.191479,0.956149,0.752327,-0.369662,-0.172673,-0.544985,0.106076,...,-0.490935,-1.791598,0.094874,-1.064726,2.977117,-0.982953,-0.247226,-1.925501,0.128560,-0.143170
1,1.234020,-2.156208,-0.766580,0.495528,-0.805767,3.390228,-0.892320,-0.259957,-1.126772,-0.202959,...,-0.379454,-1.989773,-0.150883,-0.683533,2.866306,-1.475187,-0.395792,-1.982914,-0.100541,-1.006218
2,1.219836,-1.142847,0.039836,0.711972,0.140667,2.362683,-1.561197,-0.761086,-1.807555,-0.180743,...,-1.682675,-2.920526,-0.273810,-1.780833,2.681002,-0.387218,-0.109436,-0.757699,-0.271634,2.876820
3,-0.622250,-1.417751,-0.887702,0.130899,-0.880343,0.795502,-0.227442,-0.070906,-0.332513,-0.606239,...,-0.400124,-1.920756,-0.098449,-0.924364,3.082571,-1.637564,-0.574426,-2.065111,-0.242322,-1.142099
4,0.599720,-2.429109,-0.659018,0.495170,-0.941146,3.028829,-0.724182,-0.337496,-1.314271,0.491559,...,-0.316218,-1.504008,-0.123451,-0.470192,3.099170,-1.563356,-0.461233,-2.327640,0.334137,-0.963076
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3120,0.390798,-1.624711,-0.300454,0.403175,-0.105704,2.015509,-0.734687,-0.024370,-1.151625,-0.922420,...,-0.285880,-1.665726,-0.514503,-0.568216,2.979482,-1.333628,-0.434936,-2.016105,0.221518,-0.759062
3121,-0.446650,-1.575455,-0.974298,0.169963,-0.980053,1.128804,-0.202045,-0.015255,-0.452615,-0.111515,...,-0.203518,-1.241979,-0.612739,0.414541,3.012128,-1.997647,-0.955271,-2.331223,-0.077648,-1.498409
3122,0.744666,-2.578974,-1.114246,0.683789,-1.057063,3.323641,-1.396306,-0.486834,-1.845096,-0.110431,...,-1.345177,-2.339231,-0.060028,-1.725392,2.978832,-0.904839,-0.364287,-1.186526,-0.085146,-0.488642
3123,0.915846,-0.929133,-0.111333,0.432722,-0.178023,1.844979,-0.792673,-0.224964,-1.305562,0.475653,...,-0.378793,-2.320785,0.457813,-0.745520,2.898758,-0.854828,-0.217193,-1.451785,0.234904,0.058670


### 운동시작지점과 가속도계, 자이로스코프의 첫 3초의 평균

In [25]:
# 가속도계 첫번째 데이터 -> 운동시작지점
train_acc_head1=pd.DataFrame()
for i in train['id'].unique():
    train_acc_head1=train_acc_head1.append(train[['id','acc_x','acc_y','acc_z']][train[['id','acc_x','acc_y','acc_z']]['id']==i].head(1))
train_acc_head1.columns=['id','first_acc_x','first_acc_y','first_acc_z']
train_acc_head1.set_index('id',inplace=True)

test_acc_head1=pd.DataFrame()
for i in test['id'].unique():
    test_acc_head1=test_acc_head1.append(test[['id','acc_x','acc_y','acc_z']][test[['id','acc_x','acc_y','acc_z']]['id']==i].head(1))
test_acc_head1.columns=['id','first_acc_x','first_acc_y','first_acc_z']
test_acc_head1.set_index('id',inplace=True)

# 가속도계 첫 3초 -> 초반에 급격하게 움직이는 운동과 정적인 운동을 비교
train_acc_head=pd.DataFrame()
for i in train['id'].unique():
    train_acc_head=train_acc_head.append(train[['id','acc_x','acc_y','acc_z']][train[['id','acc_x','acc_y','acc_z']]['id']==i].head(150))
train_acc_head.columns=['id','head_acc_x','head_acc_y','head_acc_z'] #총 600개니까 150개
train_acc_head=train_acc_head.groupby('id').mean() #첫 3초의 가속도 평균

test_acc_head=pd.DataFrame()
for i in test['id'].unique():
    test_acc_head=test_acc_head.append(test[['id','acc_x','acc_y','acc_z']][test[['id','acc_x','acc_y','acc_z']]['id']==i].head(150))
test_acc_head.columns=['id','head_acc_x','head_acc_y','head_acc_z']
test_acc_head=test_acc_head.groupby('id').mean()

train_preprocess=pd.concat([train_preprocess,train_acc_head,train_acc_head1],axis=1)
test_preprocess=pd.concat([test_preprocess,test_acc_head,test_acc_head1],axis=1)

# 자이로스코프 첫 3초
train_gy_head=pd.DataFrame()
for i in train['id'].unique():
    train_gy_head=train_gy_head.append(train[['id','gy_x','gy_y','gy_z']][train[['id','gy_x','gy_y','gy_z']]['id']==i].head(150))
train_gy_head.columns=['id','head_gy_x','head_gy_y','head_gy_z']
train_gy_head=train_gy_head.groupby('id').mean()

test_gy_head=pd.DataFrame()
for i in test['id'].unique():
    test_gy_head=test_gy_head.append(test[['id','gy_x','gy_y','gy_z']][test[['id','gy_x','gy_y','gy_z']]['id']==i].head(150))
test_gy_head.columns=['id','head_gy_x','head_gy_y','head_gy_z']
test_gy_head=test_gy_head.groupby('id').mean()

train_preprocess=pd.concat([train_preprocess,train_gy_head],axis=1)
test_preprocess=pd.concat([test_preprocess,test_gy_head],axis=1)

### Kmeans

In [26]:
model = KMeans(n_clusters=5,random_state=20)
model.fit(train_preprocess)

train_predict = model.predict(train_preprocess)
train_preprocess['cluster']=train_predict

test_predict = model.predict(test_preprocess)
test_preprocess['cluster']=test_predict

In [27]:
#중간점검
train_preprocess #133개

Unnamed: 0_level_0,acc_x_max,acc_x_min,acc_x_mean,acc_x_std,acc_x_median,acc_x_range_func,acc_x_iqr_func2,acc_x_iqr_func3,acc_x_iqr_func4,acc_x_preskew,...,head_acc_x,head_acc_y,head_acc_z,first_acc_x,first_acc_y,first_acc_z,head_gy_x,head_gy_y,head_gy_z,cluster
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.344268,0.591940,0.931329,0.191479,0.956149,0.752327,-0.369662,-0.172673,-0.544985,0.106076,...,0.930401,-0.204424,-0.357682,1.206087,-0.179371,-0.148447,-2.033206,-7.620945,-1.012112,0
1,1.234020,-2.156208,-0.766580,0.495528,-0.805767,3.390228,-0.892320,-0.259957,-1.126772,-0.202959,...,-0.313634,-0.567014,0.471395,-0.211795,-0.078760,0.854627,-26.182937,8.268198,10.465675,4
2,1.219836,-1.142847,0.039836,0.711972,0.140667,2.362683,-1.561197,-0.761086,-1.807555,-0.180743,...,0.005221,-0.113100,0.600223,0.241388,-0.437876,0.797198,-4.955283,-8.169446,2.694235,2
3,-0.622250,-1.417751,-0.887702,0.130899,-0.880343,0.795502,-0.227442,-0.070906,-0.332513,-0.606239,...,-0.912448,-0.256042,0.108072,-1.066225,-0.280265,0.003342,17.512190,2.476961,-0.595546,2
4,0.599720,-2.429109,-0.659018,0.495170,-0.941146,3.028829,-0.724182,-0.337496,-1.314271,0.491559,...,0.014436,-0.902046,0.173936,0.144852,-1.331613,-1.231836,-20.248809,32.095738,15.112848,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3120,0.390798,-1.624711,-0.300454,0.403175,-0.105704,2.015509,-0.734687,-0.024370,-1.151625,-0.922420,...,-0.055424,-0.760497,0.320807,0.095092,-0.827868,0.542625,-22.442501,-16.061831,13.936728,2
3121,-0.446650,-1.575455,-0.974298,0.169963,-0.980053,1.128804,-0.202045,-0.015255,-0.452615,-0.111515,...,-1.017820,-0.184831,-0.114710,-0.588936,-0.131407,-0.077380,42.917552,12.480624,-6.834904,0
3122,0.744666,-2.578974,-1.114246,0.683789,-1.057063,3.323641,-1.396306,-0.486834,-1.845096,-0.110431,...,-1.046002,-0.490153,0.235311,-1.327998,-0.385354,0.379149,11.134857,29.792051,-1.994560,3
3123,0.915846,-0.929133,-0.111333,0.432722,-0.178023,1.844979,-0.792673,-0.224964,-1.305562,0.475653,...,-0.331223,0.903302,-0.264495,-0.288226,0.643384,-0.554095,-19.214819,13.106182,-8.264080,4


# 전처리 끝

# catboost

# 8:2로 분리 후 정확도 계산

In [74]:
x_train, x_test, y_train, y_test = train_test_split(train_preprocess,train_labels['label'], test_size=0.2, random_state=42)

In [75]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(2500, 19)
(2500,)
(625, 19)
(625,)


In [76]:
cat_train=CatBoostClassifier(random_state=20,iterations=1000,depth=3)
cat_train.fit(x_train,y_train)

Learning rate set to 0.082912
0:	learn: 3.1134790	total: 30.4ms	remaining: 30.4s
1:	learn: 2.9567596	total: 55.7ms	remaining: 27.8s
2:	learn: 2.8198869	total: 77.2ms	remaining: 25.7s
3:	learn: 2.7443568	total: 99.1ms	remaining: 24.7s
4:	learn: 2.6489156	total: 120ms	remaining: 23.9s
5:	learn: 2.5872486	total: 142ms	remaining: 23.5s
6:	learn: 2.5019075	total: 162ms	remaining: 23s
7:	learn: 2.4500002	total: 182ms	remaining: 22.6s
8:	learn: 2.3929699	total: 204ms	remaining: 22.5s
9:	learn: 2.3350522	total: 225ms	remaining: 22.3s
10:	learn: 2.2919402	total: 245ms	remaining: 22s
11:	learn: 2.2581592	total: 265ms	remaining: 21.8s
12:	learn: 2.2231359	total: 285ms	remaining: 21.6s
13:	learn: 2.1951085	total: 305ms	remaining: 21.5s
14:	learn: 2.1696194	total: 326ms	remaining: 21.4s
15:	learn: 2.1411263	total: 347ms	remaining: 21.3s
16:	learn: 2.1145061	total: 367ms	remaining: 21.2s
17:	learn: 2.0976002	total: 388ms	remaining: 21.1s
18:	learn: 2.0780954	total: 408ms	remaining: 21.1s
19:	learn: 

164:	learn: 1.0414892	total: 3.42s	remaining: 17.3s
165:	learn: 1.0381613	total: 3.44s	remaining: 17.3s
166:	learn: 1.0363295	total: 3.46s	remaining: 17.3s
167:	learn: 1.0339028	total: 3.48s	remaining: 17.2s
168:	learn: 1.0306399	total: 3.5s	remaining: 17.2s
169:	learn: 1.0273609	total: 3.52s	remaining: 17.2s
170:	learn: 1.0226294	total: 3.54s	remaining: 17.1s
171:	learn: 1.0195473	total: 3.56s	remaining: 17.1s
172:	learn: 1.0159308	total: 3.58s	remaining: 17.1s
173:	learn: 1.0152026	total: 3.6s	remaining: 17.1s
174:	learn: 1.0120982	total: 3.62s	remaining: 17.1s
175:	learn: 1.0076817	total: 3.64s	remaining: 17s
176:	learn: 1.0042215	total: 3.66s	remaining: 17s
177:	learn: 1.0005145	total: 3.68s	remaining: 17s
178:	learn: 0.9981630	total: 3.69s	remaining: 16.9s
179:	learn: 0.9945196	total: 3.71s	remaining: 16.9s
180:	learn: 0.9926312	total: 3.73s	remaining: 16.9s
181:	learn: 0.9901862	total: 3.75s	remaining: 16.9s
182:	learn: 0.9864360	total: 3.77s	remaining: 16.8s
183:	learn: 0.984148

331:	learn: 0.7424819	total: 6.82s	remaining: 13.7s
332:	learn: 0.7401938	total: 6.84s	remaining: 13.7s
333:	learn: 0.7390679	total: 6.86s	remaining: 13.7s
334:	learn: 0.7381710	total: 6.88s	remaining: 13.7s
335:	learn: 0.7369451	total: 6.9s	remaining: 13.6s
336:	learn: 0.7354401	total: 6.92s	remaining: 13.6s
337:	learn: 0.7335925	total: 6.94s	remaining: 13.6s
338:	learn: 0.7330437	total: 6.96s	remaining: 13.6s
339:	learn: 0.7319096	total: 6.98s	remaining: 13.6s
340:	learn: 0.7309534	total: 7s	remaining: 13.5s
341:	learn: 0.7298374	total: 7.02s	remaining: 13.5s
342:	learn: 0.7294863	total: 7.04s	remaining: 13.5s
343:	learn: 0.7291754	total: 7.06s	remaining: 13.5s
344:	learn: 0.7283777	total: 7.08s	remaining: 13.4s
345:	learn: 0.7281389	total: 7.1s	remaining: 13.4s
346:	learn: 0.7267646	total: 7.12s	remaining: 13.4s
347:	learn: 0.7263948	total: 7.14s	remaining: 13.4s
348:	learn: 0.7250043	total: 7.16s	remaining: 13.4s
349:	learn: 0.7241642	total: 7.18s	remaining: 13.3s
350:	learn: 0.723

493:	learn: 0.5949723	total: 10.2s	remaining: 10.4s
494:	learn: 0.5937971	total: 10.2s	remaining: 10.4s
495:	learn: 0.5928478	total: 10.2s	remaining: 10.4s
496:	learn: 0.5925521	total: 10.2s	remaining: 10.4s
497:	learn: 0.5917455	total: 10.3s	remaining: 10.3s
498:	learn: 0.5912125	total: 10.3s	remaining: 10.3s
499:	learn: 0.5909911	total: 10.3s	remaining: 10.3s
500:	learn: 0.5904829	total: 10.3s	remaining: 10.3s
501:	learn: 0.5892639	total: 10.3s	remaining: 10.3s
502:	learn: 0.5881061	total: 10.4s	remaining: 10.2s
503:	learn: 0.5866392	total: 10.4s	remaining: 10.2s
504:	learn: 0.5853983	total: 10.4s	remaining: 10.2s
505:	learn: 0.5847602	total: 10.4s	remaining: 10.2s
506:	learn: 0.5844270	total: 10.4s	remaining: 10.2s
507:	learn: 0.5833233	total: 10.5s	remaining: 10.1s
508:	learn: 0.5829495	total: 10.5s	remaining: 10.1s
509:	learn: 0.5823941	total: 10.5s	remaining: 10.1s
510:	learn: 0.5809740	total: 10.5s	remaining: 10.1s
511:	learn: 0.5803501	total: 10.5s	remaining: 10s
512:	learn: 0.

652:	learn: 0.4904282	total: 13.4s	remaining: 7.11s
653:	learn: 0.4899500	total: 13.4s	remaining: 7.09s
654:	learn: 0.4894971	total: 13.4s	remaining: 7.07s
655:	learn: 0.4888475	total: 13.4s	remaining: 7.05s
656:	learn: 0.4881832	total: 13.5s	remaining: 7.03s
657:	learn: 0.4876385	total: 13.5s	remaining: 7.01s
658:	learn: 0.4866960	total: 13.5s	remaining: 6.99s
659:	learn: 0.4863159	total: 13.5s	remaining: 6.97s
660:	learn: 0.4859751	total: 13.5s	remaining: 6.95s
661:	learn: 0.4853871	total: 13.6s	remaining: 6.93s
662:	learn: 0.4848387	total: 13.6s	remaining: 6.91s
663:	learn: 0.4845073	total: 13.6s	remaining: 6.89s
664:	learn: 0.4842461	total: 13.6s	remaining: 6.87s
665:	learn: 0.4837240	total: 13.7s	remaining: 6.85s
666:	learn: 0.4834431	total: 13.7s	remaining: 6.83s
667:	learn: 0.4829591	total: 13.7s	remaining: 6.8s
668:	learn: 0.4824977	total: 13.7s	remaining: 6.78s
669:	learn: 0.4820397	total: 13.7s	remaining: 6.76s
670:	learn: 0.4815479	total: 13.8s	remaining: 6.74s
671:	learn: 0

812:	learn: 0.4198828	total: 16.6s	remaining: 3.82s
813:	learn: 0.4195561	total: 16.6s	remaining: 3.8s
814:	learn: 0.4192185	total: 16.6s	remaining: 3.78s
815:	learn: 0.4187810	total: 16.7s	remaining: 3.76s
816:	learn: 0.4185286	total: 16.7s	remaining: 3.74s
817:	learn: 0.4183012	total: 16.7s	remaining: 3.72s
818:	learn: 0.4175065	total: 16.7s	remaining: 3.69s
819:	learn: 0.4173145	total: 16.7s	remaining: 3.67s
820:	learn: 0.4170004	total: 16.8s	remaining: 3.65s
821:	learn: 0.4167616	total: 16.8s	remaining: 3.63s
822:	learn: 0.4161116	total: 16.8s	remaining: 3.61s
823:	learn: 0.4153857	total: 16.8s	remaining: 3.59s
824:	learn: 0.4152293	total: 16.8s	remaining: 3.57s
825:	learn: 0.4143871	total: 16.9s	remaining: 3.55s
826:	learn: 0.4140639	total: 16.9s	remaining: 3.53s
827:	learn: 0.4134430	total: 16.9s	remaining: 3.51s
828:	learn: 0.4128814	total: 16.9s	remaining: 3.49s
829:	learn: 0.4124747	total: 16.9s	remaining: 3.47s
830:	learn: 0.4121655	total: 17s	remaining: 3.45s
831:	learn: 0.4

976:	learn: 0.3629271	total: 20s	remaining: 471ms
977:	learn: 0.3623681	total: 20s	remaining: 451ms
978:	learn: 0.3619838	total: 20.1s	remaining: 430ms
979:	learn: 0.3618878	total: 20.1s	remaining: 410ms
980:	learn: 0.3614262	total: 20.1s	remaining: 389ms
981:	learn: 0.3612155	total: 20.1s	remaining: 369ms
982:	learn: 0.3608689	total: 20.1s	remaining: 348ms
983:	learn: 0.3606020	total: 20.2s	remaining: 328ms
984:	learn: 0.3603689	total: 20.2s	remaining: 307ms
985:	learn: 0.3602621	total: 20.2s	remaining: 287ms
986:	learn: 0.3596203	total: 20.2s	remaining: 266ms
987:	learn: 0.3590268	total: 20.2s	remaining: 246ms
988:	learn: 0.3587970	total: 20.3s	remaining: 225ms
989:	learn: 0.3585362	total: 20.3s	remaining: 205ms
990:	learn: 0.3581404	total: 20.3s	remaining: 184ms
991:	learn: 0.3576939	total: 20.3s	remaining: 164ms
992:	learn: 0.3575526	total: 20.3s	remaining: 143ms
993:	learn: 0.3573895	total: 20.4s	remaining: 123ms
994:	learn: 0.3569874	total: 20.4s	remaining: 102ms
995:	learn: 0.35

<catboost.core.CatBoostClassifier at 0x29b01805580>

In [77]:
mypredictions = cat_train.predict(x_test)
print('정확도 : ',accuracy_score(y_test, mypredictions))

정확도 :  0.7264


In [28]:
cat=CatBoostClassifier(random_state=20,iterations=1000,depth=3)
cat.fit(train_preprocess, train_labels['label'])

predict=cat.predict_proba(test_preprocess) # -> 얼마나 정확한지 확인 (예측확률)

Learning rate set to 0.083861
0:	learn: 2.6699890	total: 118ms	remaining: 1m 57s
1:	learn: 2.5962770	total: 219ms	remaining: 1m 49s
2:	learn: 2.5161895	total: 319ms	remaining: 1m 46s
3:	learn: 2.4520120	total: 420ms	remaining: 1m 44s
4:	learn: 2.3830879	total: 519ms	remaining: 1m 43s
5:	learn: 2.3116390	total: 610ms	remaining: 1m 41s
6:	learn: 2.2644013	total: 715ms	remaining: 1m 41s
7:	learn: 2.2135164	total: 827ms	remaining: 1m 42s
8:	learn: 2.1679731	total: 930ms	remaining: 1m 42s
9:	learn: 2.1436360	total: 1.03s	remaining: 1m 41s
10:	learn: 2.0894649	total: 1.12s	remaining: 1m 41s
11:	learn: 2.0490791	total: 1.23s	remaining: 1m 40s
12:	learn: 2.0032984	total: 1.32s	remaining: 1m 40s
13:	learn: 1.9746964	total: 1.43s	remaining: 1m 40s
14:	learn: 1.9541204	total: 1.53s	remaining: 1m 40s
15:	learn: 1.9319820	total: 1.63s	remaining: 1m 40s
16:	learn: 1.9032721	total: 1.73s	remaining: 1m 40s
17:	learn: 1.8781577	total: 1.83s	remaining: 1m 40s
18:	learn: 1.8544693	total: 1.93s	remaining:

158:	learn: 0.7504869	total: 16.8s	remaining: 1m 28s
159:	learn: 0.7474507	total: 16.9s	remaining: 1m 28s
160:	learn: 0.7441938	total: 17s	remaining: 1m 28s
161:	learn: 0.7404374	total: 17.1s	remaining: 1m 28s
162:	learn: 0.7387858	total: 17.2s	remaining: 1m 28s
163:	learn: 0.7356166	total: 17.3s	remaining: 1m 28s
164:	learn: 0.7346347	total: 17.4s	remaining: 1m 28s
165:	learn: 0.7317730	total: 17.5s	remaining: 1m 28s
166:	learn: 0.7287589	total: 17.6s	remaining: 1m 27s
167:	learn: 0.7254486	total: 17.7s	remaining: 1m 27s
168:	learn: 0.7221518	total: 17.8s	remaining: 1m 27s
169:	learn: 0.7207046	total: 17.9s	remaining: 1m 27s
170:	learn: 0.7182189	total: 18s	remaining: 1m 27s
171:	learn: 0.7157592	total: 18.1s	remaining: 1m 27s
172:	learn: 0.7121327	total: 18.2s	remaining: 1m 27s
173:	learn: 0.7111663	total: 18.3s	remaining: 1m 27s
174:	learn: 0.7097773	total: 18.4s	remaining: 1m 26s
175:	learn: 0.7078193	total: 18.5s	remaining: 1m 26s
176:	learn: 0.7063085	total: 18.6s	remaining: 1m 2

314:	learn: 0.4886334	total: 33.5s	remaining: 1m 12s
315:	learn: 0.4870540	total: 33.6s	remaining: 1m 12s
316:	learn: 0.4865083	total: 33.7s	remaining: 1m 12s
317:	learn: 0.4854397	total: 33.8s	remaining: 1m 12s
318:	learn: 0.4844657	total: 33.9s	remaining: 1m 12s
319:	learn: 0.4840179	total: 34s	remaining: 1m 12s
320:	learn: 0.4834295	total: 34.1s	remaining: 1m 12s
321:	learn: 0.4816546	total: 34.2s	remaining: 1m 12s
322:	learn: 0.4801972	total: 34.3s	remaining: 1m 11s
323:	learn: 0.4788899	total: 34.5s	remaining: 1m 11s
324:	learn: 0.4775054	total: 34.6s	remaining: 1m 11s
325:	learn: 0.4763773	total: 34.7s	remaining: 1m 11s
326:	learn: 0.4756084	total: 34.8s	remaining: 1m 11s
327:	learn: 0.4749262	total: 34.9s	remaining: 1m 11s
328:	learn: 0.4743620	total: 35s	remaining: 1m 11s
329:	learn: 0.4732582	total: 35.1s	remaining: 1m 11s
330:	learn: 0.4716605	total: 35.3s	remaining: 1m 11s
331:	learn: 0.4708628	total: 35.4s	remaining: 1m 11s
332:	learn: 0.4703700	total: 35.5s	remaining: 1m 1

475:	learn: 0.3644534	total: 50.9s	remaining: 56s
476:	learn: 0.3636152	total: 51s	remaining: 55.9s
477:	learn: 0.3633984	total: 51.1s	remaining: 55.8s
478:	learn: 0.3624307	total: 51.2s	remaining: 55.7s
479:	learn: 0.3615979	total: 51.3s	remaining: 55.6s
480:	learn: 0.3610340	total: 51.4s	remaining: 55.5s
481:	learn: 0.3604811	total: 51.5s	remaining: 55.4s
482:	learn: 0.3599943	total: 51.6s	remaining: 55.2s
483:	learn: 0.3594970	total: 51.7s	remaining: 55.1s
484:	learn: 0.3591019	total: 51.8s	remaining: 55s
485:	learn: 0.3587022	total: 51.9s	remaining: 54.9s
486:	learn: 0.3582385	total: 52s	remaining: 54.8s
487:	learn: 0.3575328	total: 52.2s	remaining: 54.7s
488:	learn: 0.3572399	total: 52.3s	remaining: 54.6s
489:	learn: 0.3568666	total: 52.4s	remaining: 54.5s
490:	learn: 0.3561118	total: 52.5s	remaining: 54.4s
491:	learn: 0.3557858	total: 52.6s	remaining: 54.3s
492:	learn: 0.3555712	total: 52.7s	remaining: 54.2s
493:	learn: 0.3552282	total: 52.8s	remaining: 54.1s
494:	learn: 0.354747

637:	learn: 0.2856168	total: 1m 8s	remaining: 38.6s
638:	learn: 0.2853541	total: 1m 8s	remaining: 38.5s
639:	learn: 0.2850279	total: 1m 8s	remaining: 38.4s
640:	learn: 0.2846390	total: 1m 8s	remaining: 38.3s
641:	learn: 0.2843148	total: 1m 8s	remaining: 38.2s
642:	learn: 0.2839383	total: 1m 8s	remaining: 38s
643:	learn: 0.2833145	total: 1m 8s	remaining: 37.9s
644:	learn: 0.2830765	total: 1m 8s	remaining: 37.8s
645:	learn: 0.2828796	total: 1m 8s	remaining: 37.7s
646:	learn: 0.2821457	total: 1m 8s	remaining: 37.6s
647:	learn: 0.2817651	total: 1m 9s	remaining: 37.5s
648:	learn: 0.2812898	total: 1m 9s	remaining: 37.4s
649:	learn: 0.2809987	total: 1m 9s	remaining: 37.3s
650:	learn: 0.2805971	total: 1m 9s	remaining: 37.2s
651:	learn: 0.2801922	total: 1m 9s	remaining: 37.1s
652:	learn: 0.2799922	total: 1m 9s	remaining: 37s
653:	learn: 0.2796780	total: 1m 9s	remaining: 36.9s
654:	learn: 0.2795276	total: 1m 9s	remaining: 36.7s
655:	learn: 0.2792574	total: 1m 9s	remaining: 36.6s
656:	learn: 0.27

794:	learn: 0.2320454	total: 1m 24s	remaining: 21.8s
795:	learn: 0.2314499	total: 1m 24s	remaining: 21.6s
796:	learn: 0.2314212	total: 1m 24s	remaining: 21.5s
797:	learn: 0.2311423	total: 1m 24s	remaining: 21.4s
798:	learn: 0.2307091	total: 1m 24s	remaining: 21.3s
799:	learn: 0.2306788	total: 1m 24s	remaining: 21.2s
800:	learn: 0.2304388	total: 1m 24s	remaining: 21.1s
801:	learn: 0.2302414	total: 1m 25s	remaining: 21s
802:	learn: 0.2301003	total: 1m 25s	remaining: 20.9s
803:	learn: 0.2298460	total: 1m 25s	remaining: 20.8s
804:	learn: 0.2292106	total: 1m 25s	remaining: 20.7s
805:	learn: 0.2288693	total: 1m 25s	remaining: 20.6s
806:	learn: 0.2283423	total: 1m 25s	remaining: 20.5s
807:	learn: 0.2278733	total: 1m 25s	remaining: 20.4s
808:	learn: 0.2274171	total: 1m 25s	remaining: 20.3s
809:	learn: 0.2271120	total: 1m 26s	remaining: 20.2s
810:	learn: 0.2269265	total: 1m 26s	remaining: 20.1s
811:	learn: 0.2266927	total: 1m 26s	remaining: 20s
812:	learn: 0.2264243	total: 1m 26s	remaining: 19.

950:	learn: 0.1980836	total: 1m 42s	remaining: 5.26s
951:	learn: 0.1978271	total: 1m 42s	remaining: 5.15s
952:	learn: 0.1976439	total: 1m 42s	remaining: 5.05s
953:	learn: 0.1973590	total: 1m 42s	remaining: 4.94s
954:	learn: 0.1970514	total: 1m 42s	remaining: 4.83s
955:	learn: 0.1968175	total: 1m 42s	remaining: 4.72s
956:	learn: 0.1967101	total: 1m 42s	remaining: 4.62s
957:	learn: 0.1963681	total: 1m 42s	remaining: 4.51s
958:	learn: 0.1962860	total: 1m 42s	remaining: 4.4s
959:	learn: 0.1960895	total: 1m 43s	remaining: 4.29s
960:	learn: 0.1959394	total: 1m 43s	remaining: 4.19s
961:	learn: 0.1955679	total: 1m 43s	remaining: 4.08s
962:	learn: 0.1949940	total: 1m 43s	remaining: 3.97s
963:	learn: 0.1947302	total: 1m 43s	remaining: 3.87s
964:	learn: 0.1945540	total: 1m 43s	remaining: 3.76s
965:	learn: 0.1943701	total: 1m 43s	remaining: 3.65s
966:	learn: 0.1942253	total: 1m 43s	remaining: 3.54s
967:	learn: 0.1939743	total: 1m 43s	remaining: 3.44s
968:	learn: 0.1935365	total: 1m 44s	remaining: 

In [29]:
sub.iloc[:,1:]=predict
sub.to_csv('[final]Not_StratifiedKfold.csv', index=False)
sub

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,51,52,53,54,55,56,57,58,59,60
0,3125,0.000075,0.000217,0.000023,0.000407,0.000262,0.000036,0.000752,0.000026,0.000063,...,0.002414,0.000337,0.000067,0.000040,1.429929e-05,0.000025,0.000394,0.075409,0.000002,0.000740
1,3126,0.001500,0.000164,0.000005,0.000405,0.000410,0.001090,0.000007,0.000678,0.000038,...,0.000136,0.000004,0.000093,0.000130,1.206590e-05,0.000239,0.000618,0.000023,0.000073,0.000196
2,3127,0.009709,0.190421,0.000101,0.007923,0.000831,0.001783,0.078420,0.005845,0.003536,...,0.000443,0.001569,0.000082,0.005364,1.529424e-04,0.000811,0.000169,0.003478,0.002820,0.054309
3,3128,0.004886,0.000109,0.000146,0.000600,0.000025,0.000200,0.000018,0.000235,0.000123,...,0.000139,0.000033,0.000040,0.000300,3.335238e-05,0.000024,0.000735,0.000131,0.000007,0.006686
4,3129,0.001409,0.000048,0.000002,0.000097,0.000067,0.000149,0.000001,0.000172,0.000031,...,0.000015,0.000001,0.000002,0.000004,1.633889e-06,0.000007,0.000591,0.000007,0.000014,0.000041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,0.019279,0.000448,0.000006,0.000792,0.143084,0.002006,0.000022,0.000610,0.000424,...,0.000194,0.000047,0.000047,0.000025,5.964438e-06,0.000038,0.000276,0.000239,0.000622,0.001382
778,3903,0.001459,0.000078,0.000003,0.000154,0.000329,0.001477,0.000004,0.000061,0.000095,...,0.000058,0.000004,0.000014,0.000014,7.718810e-06,0.000035,0.000160,0.000011,0.000035,0.000061
779,3904,0.000395,0.000006,0.000009,0.000028,0.000008,0.000040,0.000002,0.000037,0.000091,...,0.000028,0.000002,0.000007,0.000006,7.850584e-06,0.000003,0.000151,0.000016,0.000001,0.001072
780,3905,0.000070,0.003093,0.000063,0.000043,0.000002,0.000030,0.016205,0.000452,0.000007,...,0.000089,0.000012,0.000096,0.001025,1.778669e-04,0.000521,0.000161,0.000160,0.000017,0.000008


# randomforest

# 8:2로 분리 후 정확도 계산

In [78]:
x_train, x_test, y_train, y_test = train_test_split(train_preprocess,train_labels['label'], test_size=0.2, random_state=42)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(2500, 19)
(2500,)
(625, 19)
(625,)


In [79]:
ran_train=RandomForestClassifier(n_estimators=100, max_depth=20,random_state=0)
ran_train.fit(x_train,y_train)

RandomForestClassifier(max_depth=20, random_state=0)

In [80]:
predict_ran = ran_train.predict(x_test)
print('정확도 : ',accuracy_score(y_test,predict_ran))

정확도 :  0.7088
