## Imports and stuff

In [None]:
import tensorflow as tf
import keras
from keras.layers import Dense,Dropout, Activation
from keras.models import Sequential
from keras.optimizers import *

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import librosa
from sklearn.linear_model import LogisticRegression, Lasso, ElasticNet
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings("ignore")

In [None]:
X_train = np.array(np.load("X_train.npy"))
y_train = np.array(np.load("y_train.npy"))
X_dev = np.array(np.load("X_dev.npy"))
y_dev = np.array(np.load("y_dev.npy"))
X_test = np.array(np.load("X_test.npy"))
y_trainonehot = np.array(pd.get_dummies(y_train))
y_devonehot = np.array(pd.get_dummies(y_dev))

In [None]:
def score_func(y_test, y_true):
    if (len(y_test) != len(y_true)):
        raise ValueError("The two arrays aren't of the same length, aborting")
    else:
        return 100*len([y_test[i] for i in range(len(y_test)) if y_test[i]==y_true[i]])/len(y_test)

## Neural Network

In [None]:
print(X_train.shape, X_dev.shape, X_test.shape)
X_tot = np.concatenate([X_train, X_dev, X_test])
#lol = PCA(n_components = 50)
#X_tot = lol.fit_transform(X_tot)
X_tot = preprocessing.scale(X_tot)
X_train, X_dev, X_test = X_tot[:len(X_train)], X_tot[len(X_train):len(X_train)+len(X_dev)], X_tot[len(X_train)+len(X_dev):]
print(X_train.shape, X_dev.shape, X_test.shape)

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
scores = []
cv = 1
for i in range(cv):
    model = Sequential()
    model.add(Dense(40, input_dim = 193))
    model.add(Dropout(0.1))
    model.add(Activation('relu'))
    model.add(Dense(20))
    model.add(Dropout(0.1))
    model.add(Activation('relu'))
    model.add(Dense(15))
    model.add(Activation('softmax'))
    #optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    
    X = np.concatenate([X_train,X_dev])
    y = np.concatenate([y_trainonehot,y_devonehot])
    X,y = unison_shuffled_copies(X, y)
    
    history = model.fit(X,y, batch_size = 10, nb_epoch = 40, 
                        verbose = 0, validation_split = 0.1)
    score1 = model.evaluate(X_train,y_trainonehot,verbose=0)
    score2 = model.evaluate(X_dev,y_devonehot,verbose=0)
    print(score1[1], score2[1])
    scores.append((score1[1]+score2[1])/2)
print("Mean score:",np.mean(scores))

In [None]:
from sklearn.metrics import confusion_matrix
L = [np.argmax(x) for x in model.predict(X_dev)]
print(confusion_matrix(L,y_dev))

In [None]:
%matplotlib inline
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.ylim((0,1))
plt.legend(['train', 'test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
nn_pred = model.predict(X_test)
nn_pred = [np.argmax(nn_pred[i]) for i in range(len(nn_pred))]
np.savetxt('y_pred.txt', nn_pred, fmt="%d")
print(np.unique(nn_pred,return_counts=True))

## Regression Logistique ftw

In [None]:
reg = LogisticRegression()
reg.fit(np.concatenate([X_train,X_dev]),np.concatenate([y_train,y_dev]))
#clf.fit(X_dev, y_dev)
reg_pred = reg.predict(X_test)
np.savetxt('y_pred.txt', reg_pred, fmt="%d")

# K-means

In [None]:
from sklearn.cluster import KMeans
import operator
dict_list = []
for i in range(15):
    dict_list.append({})
kmeans = KMeans(n_clusters=15, random_state=0).fit(X_tot)
X = np.concatenate([X_train,X_dev])
y = np.concatenate([y_train,y_dev])
pred = kmeans.predict(X)
#print(pred, y)
for i in range(len(pred)):
    try:
        dict_list[pred[i]][y[i]] += 1
    except:
        dict_list[pred[i]][y[i]] = 1
#print(dict_list)
corr_dict = {}
for i in range(15):
    try:
        corr_dict[i] = max(dict_list[i].items(), key=operator.itemgetter(1))[0]
    except:
        corr_dict[i] = None
        
for i in range(len(pred)):
    pred[i] = corr_dict[pred[i]]

In [None]:
"""y_new = []
pred_new = []
new_class = {3:1,13:1,14:1,11:4,12:4,9:4,6:4,7:4,10:4}
for i in range(len(y)):
    try:
        y_new.append(new_class[y[i]])        
    except:
        y_new.append(y[i])
    try:
        pred_new.append(new_class[pred[i]])
    except:
        pred_new.append(pred[i])"""
from sklearn.metrics import confusion_matrix, accuracy_score
print(confusion_matrix(y,pred))
print(accuracy_score(y,pred))

## XGBoost 

In [None]:
from xgboost import XGBClassifier
gbm = XGBClassifier()
X = np.concatenate([X_train,X_dev])
y = np.concatenate([y_train,y_dev])
X,y = unison_shuffled_copies(X, y)
    
print(X.shape,y.shape)
gbm.fit(X, y)

In [None]:
xgboost_pred = gbm.predict(X_test)
np.savetxt('y_pred.txt', xgboost_pred, fmt="%d")
print(np.unique(xgboost_pred,return_counts=True))

## Mix results 

In [None]:
new_pred = []
from collections import Counter
preds = {}
for i in range(len(nn_pred)):
    preds[i] = []
    preds[i].append(nn_pred[i])
    preds[i].append(reg_pred[i])
    preds[i].append(xgboost_pred[i])
for x in preds.values():
    count = Counter(x)
    new_pred.append(count.most_common()[0][0])
np.savetxt('y_pred.txt', new_pred, fmt="%d")
print(np.unique(new_pred,return_counts=True))