In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
files_test = []
train_patch = '/kaggle/input/transformer/data_train/data_train/'
test_patch = '/kaggle/input/transformer/data_test/data_test/'
df_train = pd.read_csv('/kaggle/input/transformer/train.csv')

for _, _, filenames in os.walk('/kaggle/input/transformer/data_test'):
    for filename in filenames:
        files_test.append(filename)

In [3]:
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
import catboost as cb

In [4]:
X = []
y = []

for row in df_train.iterrows():
    df = pd.read_csv(train_patch + row[1]['id'])
    X.append(np.hstack((df.iloc[-1].values, 
           df.iloc[-100].values, df.iloc[-1].values - df.iloc[-100].values, df.iloc[-100:].mean(), df.iloc[-100:].std(), 
           df.iloc[-200].values, df.iloc[-1].values - df.iloc[-200].values, df.iloc[-200:].mean(), df.iloc[-200:].std(), 
           df.iloc[-300].values, df.iloc[-1].values - df.iloc[-300].values, df.iloc[-300:].mean(), df.iloc[-300:].std())))
    y.append(row[1]['category'])
X = np.array(X)
y = np.array(y)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=6)

In [6]:
tree_params = {
    'iterations': 10000,
    'depth': 8,
    'learning_rate': 0.1,
    'eval_metric': 'Accuracy',
    'loss_function': 'MultiClass',
    'random_state': 6,
    'l2_leaf_reg': 10,
    'border_count': 100,
    'task_type': 'GPU',
    'devices': '0:1'
}

model = cb.CatBoostClassifier(**tree_params)

In [7]:
model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose_eval=1000, use_best_model=True, plot=False)

0:	learn: 0.9369048	test: 0.9119048	best: 0.9119048 (0)	total: 24.9ms	remaining: 4m 9s
1000:	learn: 1.0000000	test: 0.9571429	best: 0.9595238 (323)	total: 13.9s	remaining: 2m 5s
2000:	learn: 1.0000000	test: 0.9619048	best: 0.9619048 (1822)	total: 28.2s	remaining: 1m 52s
3000:	learn: 1.0000000	test: 0.9619048	best: 0.9642857 (2348)	total: 41.5s	remaining: 1m 36s
4000:	learn: 1.0000000	test: 0.9619048	best: 0.9642857 (2348)	total: 55.4s	remaining: 1m 23s
5000:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 1m 8s	remaining: 1m 8s
6000:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 1m 22s	remaining: 55s
7000:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 1m 36s	remaining: 41.4s
8000:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 1m 49s	remaining: 27.4s
9000:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 2m 3s	remaining: 13.7s
9999:	learn: 1.0000000	test: 0.9642857	best: 0.9642857 (2348)	total: 2m 16s	remain

<catboost.core.CatBoostClassifier at 0x7fc753ac4310>

In [8]:
X_test = []

for file in files_test:
    df = pd.read_csv(test_patch + file)
    X_test.append(np.hstack((df.iloc[-1].values, 
           df.iloc[-100].values, df.iloc[-1].values - df.iloc[-100].values, df.iloc[-100:].mean(), df.iloc[-100:].std(), 
           df.iloc[-200].values, df.iloc[-1].values - df.iloc[-200].values, df.iloc[-200:].mean(), df.iloc[-200:].std(), 
           df.iloc[-300].values, df.iloc[-1].values - df.iloc[-300].values, df.iloc[-300:].mean(), df.iloc[-300:].std())))
X_test = np.array(X_test)

In [9]:
pred = model.predict(X_test)

In [10]:
df = pd.DataFrame(np.array([files_test, pred.T[0]]).T, columns=['id', 'category'])

In [11]:
df.to_csv('task-1.csv', index=False)

In [12]:
model.save_model('model1')