# Discrete Fourier Transform

In [42]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.utils import preprocess

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, Dropout, Flatten, Dense, LSTM
from keras import losses

from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold

## Loading data

In [2]:
# Open files
Data_X_train = pd.read_csv('data/challenge_fichier_dentrees_dentrainement_challenge_nba/train.csv')
Data_Y_train = pd.read_csv('data/challenge_fichier_de_sortie_dentrainement_challenge_nba.csv', sep=';')
X_test = pd.read_csv('data/challenge_fichiers_dentrees_de_test_challenge_nba/test.csv')

In [3]:
kept_features = ('ID','miss', 'score', 'assist', 'offensive rebound')
filter_col = [col for col in Data_X_train if col.startswith(kept_features)]
Data_X_train_reduced = Data_X_train[filter_col]
Data_X_train_reduced.head()

Unnamed: 0,ID,score_1,offensive rebound_1,assist_1,miss_1,score_2,offensive rebound_2,assist_2,miss_2,score_3,...,assist_1438,miss_1438,score_1439,offensive rebound_1439,assist_1439,miss_1439,score_1440,offensive rebound_1440,assist_1440,miss_1440
0,14186,-2,0,0,0,-2,0,0,0,-2,...,-4,9,-16,8,-4,9,-13,8,-3,9
1,13013,0,0,0,1,0,0,0,1,0,...,2,0,-5,1,1,0,-5,1,1,-1
2,7102,0,0,0,0,0,0,0,0,0,...,0,-5,5,-1,0,-5,5,-1,0,-5
3,7637,-2,0,0,0,-2,0,0,0,-2,...,-1,-1,-1,2,-1,-1,-1,2,-1,-1
4,12350,0,0,0,1,0,0,0,1,0,...,4,4,1,4,4,4,1,4,4,3


In [24]:
X, Y, _, _ = preprocess(Data_X_train_reduced, Data_Y_train, 1)
nb_games = len(X)

## First frequencies

In [21]:
nb_freq = 200

In [22]:
X_FF = []
features = ['miss', 'score', 'assist', 'offensive rebound', 'defensive rebound', 'offensive foul', 'defensive foul',
           'lost ball', 'steals', 'bad pass', 'block']
for feat in main_features :
    filter_col = [col for col in Data_X_train if col.startswith(feat)]
    X_feat = Data_X_train[filter_col].as_matrix()
    X_feat_fft = np.fft.fft(X_feat)[:,:nb_freq]
    X_FF.append(abs(X_feat_fft))
X_fft = np.hstack(X_FF)

## Mix features in frequence and time

### Aggregate by subsequence of 10 seconds

In [25]:
X_seq = X.reshape((nb_games, 4, 10, -1), order = 'F')
X_seq = X_seq.mean(axis = 2)
X_seq = X_seq.reshape(nb_games, 576)
X_tot = np.concatenate((X_seq, X_fft), axis = 1)

In [26]:
X_tot = np.concatenate((X_seq, X_fft), axis = 1)
X_tot.shape

(12576, 1376)

## K folds

In [40]:
skf = StratifiedKFold(n_splits = 2, shuffle = True, random_state = 7)

In [49]:
def evaluate_model(model, X, Y, skf) :
    accs_train = []
    accs_val = []
    for train, val in skf.split(X, Y) :
        model.fit(X[train], Y[train])
        acc_train = model.score(X[train], Y[train])
        acc_val = model.score(X[val], Y[val])
        accs_train.append(acc_train)
        accs_val.append(acc_val)
        print('(Training, Validation) accuracies: ({0:.2f},{1:.2f})'.format(100*acc_train, 100*acc_val))

    print('Mean Training Accuracy: {0:.2f} +/- {1:.2f}'.format(100*np.mean(accs_train), 100*np.std(accs_train)))
    print('Mean Validation Accuracy: {0:.2f} +/- {1:.2f}'.format(100*np.mean(accs_val), 100*np.std(accs_val)))   

### MLP CLassifier

In [90]:
model = MLPClassifier()
evaluate_model(model, X, Y, skf)

(Training, Validation) accuracies: (67.53,63.84)
(Training, Validation) accuracies: (76.75,66.27)
(Training, Validation) accuracies: (70.52,66.82)
Mean Training Accuracy: 71.60 +/- 3.84
Mean Validation Accuracy: 65.64 +/- 1.30


### Logistic Regression

In [92]:
c = 1e-5
LR = LogisticRegression(C=c)
evaluate_model(LR, X, Y, skf)

(Training, Validation) accuracies: (74.52,71.54)
(Training, Validation) accuracies: (74.86,70.99)
(Training, Validation) accuracies: (74.36,72.05)
(Training, Validation) accuracies: (74.68,70.02)
(Training, Validation) accuracies: (74.85,70.56)
Mean Training Accuracy: 74.65 +/- 0.19
Mean Validation Accuracy: 71.03 +/- 0.71


### CNN models

In [39]:
X_cnn = X_tot.reshape((len(X_tot), 4, 344, 1), order = 'F')

In [60]:
conv_model = Sequential()

conv_model.add(BatchNormalization(axis=1, 
                                  input_shape = (4, 344, 1)))
conv_model.add(Conv2D(filters = 16, 
                 kernel_size = (4, 10), 
                 activation = 'relu'))
conv_model.add(Dropout(0.75))
conv_model.add(Flatten())
conv_model.add(Dense(units = 50, activation = 'relu'))
conv_model.add(Dropout(0.5))
conv_model.add(Dense(units = 2, 
                activation='softmax'))

conv_model.compile(loss = losses.categorical_crossentropy,
                     optimizer = 'adam',
                     metrics = ['accuracy'])

accs_train = []
accs_val = []
for train, val in skf.split(X_cnn, Y) :
    Y_cnn = np_utils.to_categorical(Y, 2)
    training = conv_model.fit(X_cnn[train], Y_cnn[train], epochs = 50, batch_size = 32, verbose = True)
    acc_train = training.history['acc'][-1]
    accs_train.append(acc_train)
    acc_val = conv_model.evaluate(X_cnn[val], Y_cnn[val])[1]
    accs_val.append(acc_val)
    print('(Training, Validation) accuracies: ({0:.2f},{1:.2f})'.format(100*acc_train, 100*acc_val))
    
print('Mean Training Accuracy: {0:.2f} +/- {1:.2f}'.format(100*np.mean(accs_train), 100*np.std(accs_train)))
print('Mean Validation Accuracy: {0:.2f} +/- {1:.2f}'.format(100*np.mean(accs_val), 100*np.std(accs_val)))  

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(Training, Validation) accuracies: (71.58,72.17)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/