In [2]:
import cv2
import numpy as np
import pandas as pd
import torch 

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# Load Data

In [3]:
X_test = pd.read_csv("X_test.csv")
X_train = pd.read_csv("X_train.csv")
y_train = pd.read_csv("y_train.csv")
X_test.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.025773,-0.98864,-0.14801,0.00335,-0.006524,-0.001071,-0.02739,0.10043,4.2061,-5.5439
1,0_1,0,1,-0.025683,-0.98862,-0.14816,0.003439,-0.11396,0.083987,-0.06059,-0.70889,3.9905,-8.0273
2,0_2,0,2,-0.025617,-0.98861,-0.14826,0.003571,-0.080518,0.11486,-0.037177,1.4571,2.2828,-11.299
3,0_3,0,3,-0.025566,-0.98862,-0.14817,0.003609,0.070067,0.03382,-0.035904,0.71096,1.8582,-12.227
4,0_4,0,4,-0.025548,-0.98866,-0.14792,0.003477,0.15205,-0.029016,-0.015314,3.3996,2.7881,-10.41


# Convert Data

In [4]:
def to_np(csv_file):
    test_np = np.array(csv_file)
    test_np = test_np[:,3:]
    shape = test_np.shape
    div = X_test.measurement_number.max() + 1 
    test_np = test_np.reshape((shape[0]//div, shape[1]*div))
    return test_np

test_np = to_np(X_test)
train_np = to_np(X_train)
y_train_np = np.array(y_train)[:,2:]

enc_map = []
for i, a in enumerate(np.unique(y_train_np)):
    y_train_np[y_train_np == a] = i 
    enc_map.append((a, i ))
enc_map = np.array(enc_map)
enc_map

array([['carpet', '0'],
       ['concrete', '1'],
       ['fine_concrete', '2'],
       ['hard_tiles', '3'],
       ['hard_tiles_large_space', '4'],
       ['soft_pvc', '5'],
       ['soft_tiles', '6'],
       ['tiled', '7'],
       ['wood', '8']], dtype='<U22')

In [5]:
test_np.shape, train_np.shape, y_train.shape

((3816, 1280), (3810, 1280), (3810, 3))

# Classifier

In [6]:
clf = GradientBoostingClassifier(n_estimators=20, max_depth=2)
scaler.fit(train_np)
train_np_n = scaler.transform(train_np)



In [7]:
x_t, x_v, y_t, y_v = train_test_split( train_np_n, y_train_np, test_size=0.20, random_state=42)
clf.fit(x_t, y_t.ravel().astype('int'))

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=2,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=20,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

# Accuracy

In [19]:
y_p = clf.predict(x_v)
acc = (y_v[:, 0]==y_p)
acc.sum() / y_p.shape[0]

0.5721784776902887

# Create CSV

In [21]:
labels_out = np.array(y_p,dtype=str)
for i in np.unique(y_p):
    labels_out[y_p==i] = enc_map[:,0][i]

num_idx = np.array(list(range(labels_out.shape[0])))
df = pd.DataFrame(num_idx, columns=["series_id"])
df["surface"] = labels_out
df.to_csv("submission4.csv",index=False)