<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Load" data-toc-modified-id="Load-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load</a></span></li><li><span><a href="#LGBM-training" data-toc-modified-id="LGBM-training-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>LGBM training</a></span><ul class="toc-item"><li><span><a href="#Prepare" data-toc-modified-id="Prepare-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Prepare</a></span></li><li><span><a href="#Core" data-toc-modified-id="Core-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Core</a></span></li></ul></li></ul></div>

# Load

In [1]:
import taiko as tk
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm
import lightgbm as lgb
from sklearn import metrics

SONG = 4
WHO = 7
TRAIN_ORDER = 3

Using TensorFlow backend.


In [2]:
pfs = []
for drummer in tqdm(range(1, 8 + 1)):
    ppf = []
    for order_id in range(1, 4 + 1):
        if SONG == 4 and drummer == 4 and (order_id in [3, 4]):
            ppf.append(None)
            continue
        ppf.append(tk.get_event_primitive_df(drummer, SONG, order_id, True, True))
    pfs.append(ppf)

100%|██████████| 8/8 [12:34<00:00, 94.30s/it] 


In [3]:
def soso(who_id, song_id):
    sfs = []
    for pf_order in range(1, 4 + 1):
        score = tk.get_score(who_id, song_id, pf_order)
        sfs.append(score)
    return sfs

In [4]:
sfs = []
for drummer in range(1, 8 + 1):
    sfs.append(soso(drummer, SONG))

In [5]:
sfs[0][0]

124010

In [6]:
pfs[0][2]

Unnamed: 0,hit_type,R_AAI,R_AVI,R_ASMA,R_GAI,R_GVI,R_GSMA,R_BAI,R_BVI,R_BSMA,...,L_AXYCORR,L_AYZCORR,L_AZXCORR,L_GXYCORR,L_GYZCORR,L_GZXCORR,L1,L2,R1,R2
0,1,0.728638,1.540865e-04,0.826800,15.285745,2.549990,24.856900,1014.670000,9.987714e+05,1014.670000,...,-0.010798,-0.988322,-0.141700,0.760434,0.677535,0.992859,0,0,0,0
1,0,0.770040,3.353734e-03,0.896583,42.058841,188.893117,60.372583,1014.670000,9.461614e+05,1014.670000,...,0.983908,0.999215,0.976060,-0.418112,0.999387,-0.449670,1,0,0,0
2,0,0.796619,3.857359e-03,0.841833,33.893244,212.299322,48.772028,1014.658333,9.621125e+05,1014.658333,...,0.281740,0.401244,0.969543,-0.496230,0.216726,0.599738,0,1,0,0
3,0,0.771062,4.460888e-04,0.807033,24.560503,3.272111,38.329200,1014.664000,9.803082e+05,1014.664000,...,0.735940,0.834373,0.701485,-0.651854,0.173905,-0.857323,0,0,0,0
4,0,0.771295,1.343601e-04,0.862750,7.531584,6.394742,12.182500,1014.665000,1.014324e+06,1014.665000,...,0.918435,0.713082,0.698527,0.605521,0.567827,0.784297,0,0,0,0
5,0,0.725052,2.993720e-05,0.803200,4.734833,0.253426,7.738500,1014.664000,1.019957e+06,1014.664000,...,0.970513,-0.884112,-0.970679,0.985872,0.424007,0.569715,0,0,0,0
6,0,0.735987,1.371483e-05,0.821833,3.992701,0.513555,6.322250,1014.665000,1.021459e+06,1014.665000,...,-0.464837,0.725089,0.272686,0.812544,0.997365,0.768118,0,0,0,0
7,0,0.743718,1.945279e-05,0.837306,4.287539,0.152374,6.969722,1014.685278,1.020904e+06,1014.685278,...,0.791269,0.972555,0.627279,-0.923328,0.964625,-0.789431,0,0,0,0
8,0,0.752521,1.414127e-05,0.852400,2.829988,0.401478,3.603600,1014.692000,1.023865e+06,1014.692000,...,-0.173702,-0.795560,0.734855,-0.770940,0.997808,-0.811400,0,0,0,0
9,0,0.743947,8.974587e-05,0.828167,7.017746,2.309184,10.340000,1014.665833,1.015357e+06,1014.665833,...,-0.580275,0.892331,-0.159920,-0.361962,0.144923,0.175908,0,0,0,0


# LGBM training

## Prepare

In [7]:
def prepare(test_who):
    train_dfs = []
    for drummer in range(1, 8 + 1):
        if test_who == drummer:
            continue
        train_dfs.append(pfs[drummer - 1][TRAIN_ORDER - 1])
    train_df = pd.DataFrame(pd.concat(train_dfs, ignore_index=True))
    
    x = train_df.drop(['hit_type'], axis=1)
    y = train_df['hit_type']
    
    valid_df = train_df.copy()
    x_valid = valid_df.drop(['hit_type'], axis=1)
    y_valid = valid_df['hit_type']
    
    return x, y, x_valid, y_valid

In [8]:
params = dict({
    'learning_rate': 0.2,
    'application': 'multiclass',
    'num_classes': 3,
#     'min_data_in_leaf': 5,
#     'max_depth': 8,
    'num_leaves': 2 ** 4,
    'verbosity': 0,
#     'metric': 'multi_error'
})

grid_params = {
    'learning_rate': [0.1, 0.2],
    'max_depth': [8, 10],
}

In [9]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    return cm

In [10]:
def get_multi_error(y, pred_y):
    cnf_matrix = confusion_matrix(y, pred_y)
    
    # Plot normalized confusion matrix
#     plt.figure(figsize=(10, 10))
    cm = plot_confusion_matrix(cnf_matrix, classes=[0, 1, 2], normalize=True,
                          title='Normalized confusion matrix')
    return sum([1 - cm[i][i] for i in range(cm.shape[0])]) / cm.shape[0]

In [11]:
def my_f1_score(y_pred, y):

    y = y.get_label()
    y_pred = y_pred.reshape(len(np.unique(y)), -1).argmax(axis=0)
    return "f1-score", metrics.f1_score(y, y_pred, average="weighted"), True

## Core

In [12]:
result_df = pd.DataFrame(
    columns=['test_who', 'test_porder', 'test_score', 'f1_score'])
id_ = 0

for test_who in range(1, 8 + 1):
    for test_porder in range(1, 3 + 1):
        x, y, x_valid, y_valid = prepare(test_who)

        train_set = lgb.Dataset(x, y)
        valid_set = lgb.Dataset(x_valid, y_valid, free_raw_data=False)

        watchlist = [valid_set]

        model = lgb.train(params, train_set=train_set, valid_sets=watchlist,
                          num_boost_round=200, verbose_eval=50, early_stopping_rounds=100, feval=my_f1_score)

        if pfs[test_who - 1][test_porder - 1] is None:
            continue
        x_test = pfs[test_who - 1][test_porder -
                                   1].drop(['hit_type'], axis=1)
        y_true = pfs[test_who - 1][test_porder - 1]['hit_type']

        y_pred = model.predict(x_test, num_iteration=model.best_iteration)

        predictions = []
        for xx in y_pred:
            predictions.append(np.argmax(xx))
        y_test = pd.Series(data=predictions)

        f1_score = '%.4f' % metrics.f1_score(
            y_test, y_true, average='weighted')

        test_score = sfs[test_who - 1][test_porder - 1]

        result_df.loc[id_] = [test_who, test_porder, test_score, f1_score]

        id_ += 1

Training until validation scores don't improve for 100 rounds.
[50]	valid_0's f1-score: 0.93203
[100]	valid_0's f1-score: 0.988635
[150]	valid_0's f1-score: 0.999527
[200]	valid_0's f1-score: 1
Training until validation scores don't improve for 100 rounds.
[50]	valid_0's f1-score: 0.93203
[100]	valid_0's f1-score: 0.988635
[150]	valid_0's f1-score: 0.999527
[200]	valid_0's f1-score: 1
Training until validation scores don't improve for 100 rounds.
[50]	valid_0's f1-score: 0.93203
[100]	valid_0's f1-score: 0.988635
[150]	valid_0's f1-score: 0.999527
[200]	valid_0's f1-score: 1
Training until validation scores don't improve for 100 rounds.
[50]	valid_0's f1-score: 0.934886
[100]	valid_0's f1-score: 0.986269
[150]	valid_0's f1-score: 0.998343
[200]	valid_0's f1-score: 1
Training until validation scores don't improve for 100 rounds.
[50]	valid_0's f1-score: 0.934886
[100]	valid_0's f1-score: 0.986269
[150]	valid_0's f1-score: 0.998343
[200]	valid_0's f1-score: 1
Training until validation sc

In [13]:
result_df.to_csv('CSV/song4_result-sc-yy.csv', index=False)