In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
import numpy as np
import warnings
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')

path = "/home/daniel/Data/arrays_256/X.txt"
X = np.loadtxt(path)
print(X.shape)

(35264, 27)


In [2]:
# separando datos en datos de entrenamiento y datos de testeo (0.25)
X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])

In [7]:
def train_ml_model(X, y, model):
    """Entrena el modelo de las estrategias de ML."""

    model.fit(X,y)

    return model

def test_ml_model(X, y, model):
    """Predice utilizando el modelo model sobre los datos de testeo test.
    :param model: Modelo de machine learning previamente entrenado
    :type model: Modelo de machine learning de sklearn o xgboost
    :param y: Datos de testeo
    :type y: 1D numpy array
    """
    # prediciendo con el modelo
    pred = model.predict(X)
    
    print("\nprecision: %s"%precision_score(y, pred)), "cuantas que dijo P realmente lo eran"
    print("recall: %s"%recall_score(y, pred)), "Qué porcentaje de todas las P acertó"
    #print("accuracy: %s"%accuracy_score(y, pred))
    print("f1_score: %s"%f1_score(y, pred))

    return pred

In [4]:
clf1 = RandomForestClassifier(n_estimators=80, max_features="sqrt", max_depth=29)
clf2 = DecisionTreeClassifier(max_depth=5)
clf3 = XGBClassifier(n_estimators=350, max_depth=3) #
clf5 = LogisticRegression(C=1)
clf6 = AdaBoostClassifier(n_estimators=99, learning_rate=0.3)
clf7 = KNeighborsClassifier(n_neighbors=17)
clf8 = MLPClassifier(hidden_layer_sizes=(26,), random_state=0)

classifiers = [clf1, clf2, clf3, clf5, clf6, clf7, clf8]

In [5]:
for clf in classifiers:
    print("\n",str(clf).split("(")[0])
    model = train_ml_model(X_train, y_train, clf)
    y_pred = test_ml_model(X_test, y_test, model)


 RandomForestClassifier

precision: 0.8264712723889277
recall: 0.810631987223363
accuracy: 0.8212341197822142
f1_score: 0.8184750057590418

 DecisionTreeClassifier

precision: 0.7290224913494809
recall: 0.7691079169518594
accuracy: 0.7430807622504537
f1_score: 0.7485289219495946

 XGBClassifier

precision: 0.8632233381157342
recall: 0.8236367784622405
accuracy: 0.8474364791288567
f1_score: 0.8429655575014594

 LogisticRegression

precision: 0.8573878951237441
recall: 0.7983116586812685
accuracy: 0.8337114337568058
f1_score: 0.8267958412098297

 AdaBoostClassifier

precision: 0.7655933348344968
recall: 0.7757243896874287
accuracy: 0.7704174228675136
f1_score: 0.7706255666364461

 KNeighborsClassifier

precision: 0.8496750494489969
recall: 0.6860597764088524
accuracy: 0.7835753176043557
f1_score: 0.7591517293612723

 MLPClassifier

precision: 0.8802693938638064
recall: 0.8051562856490988
accuracy: 0.8486842105263158
f1_score: 0.8410390848427074


In [8]:
clf_ = MLPClassifier(hidden_layer_sizes=(26,), random_state=0)
model = train_ml_model(X_train, y_train, clf_)
y_pred = test_ml_model(X_test, y_test, model)


precision: 0.8802693938638064
recall: 0.8051562856490988
f1_score: 0.8410390848427074


## Cross validation for feature extraction

# Geometry of filterbank --> 512cf

In [34]:
path1 = "/home/daniel/Data/arrays_256/X.txt"
path2 = "/home/daniel/Data/arrays_256cf/X.txt"
path3 = "/home/daniel/Data/arrays_512/X.txt"
path4 = "/home/daniel/Data/arrays_512cf/X.txt"
paths = [path1, path2, path3, path4]
for path in paths:
    X = np.loadtxt(path)
    print(path.split("/")[4],X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])
    scores = cross_val_score(clf3, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

arrays_256 (35264, 66)
Accuracy: 0.87 (+/- 0.00)
arrays_256cf (35264, 66)
Accuracy: 0.89 (+/- 0.01)
arrays_512 (35264, 66)
Accuracy: 0.91 (+/- 0.01)
arrays_512cf (35264, 66)
Accuracy: 0.92 (+/- 0.01)


## Noise ratio --> 512cf 50-50

In [37]:
path1 = "/home/daniel/Data/arrays_512cf/X.txt"
path2 = "/home/daniel/Data/arrays_512_0.5/X.txt"
paths = [path1, path2]
for path in paths:
    X = np.loadtxt(path)
    print(path.split("/")[4],X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])
    scores = cross_val_score(clf3, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

arrays_512cf (35264, 66)
Accuracy: 0.92 (+/- 0.01)
arrays_512_0.5 (35264, 66)
Accuracy: 0.89 (+/- 0.01)


## Preemphasis --> True, 512cf

In [39]:
path1 = "/home/daniel/Data/arrays_512cf/X.txt"
path2 = "/home/daniel/Data/arrays_512_noPre/X.txt"
paths = [path1, path2]
for path in paths:
    X = np.loadtxt(path)
    print(path.split("/")[4],X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])
    scores = cross_val_score(clf3, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

arrays_512cf (35264, 66)
Accuracy: 0.92 (+/- 0.00)
arrays_512_noPre (35264, 66)
Accuracy: 0.90 (+/- 0.00)


## CMN --> True

In [42]:
path1 = "/home/daniel/Data/arrays_512/X_CMN.txt"
path2 = "/home/daniel/Data/arrays_512cf/X_CMN.txt"
paths = [path1, path2]
for path in paths:
    X = np.loadtxt(path)
    print(path.split("/")[5], X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])
    scores = cross_val_score(clf3, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

X_CMN.txt (35264, 66)
Accuracy: 0.95 (+/- 0.01)
X_CMN.txt (35264, 66)
Accuracy: 0.95 (+/- 0.01)


## cepfilter True and energy True

In [43]:
path1 = "/home/daniel/Data/arrays_512cf_noEn/X_CMN.txt"
path2 = "/home/daniel/Data/arrays_512cf_noCepf/X_CMN.txt"
path3 = "/home/daniel/Data/arrays_512cf_noEn/X.txt"
path4 = "/home/daniel/Data/arrays_512cf_noCepf/X.txt"
paths = [path1, path2, path3, path4]
for path in paths:
    X = np.loadtxt(path)
    print(path.split("/")[4:],X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X[:,:-1],X[:,-1])
    scores = cross_val_score(clf3, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

['arrays_512cf_noEn', 'X_CMN.txt'] (35264, 66)
Accuracy: 0.93 (+/- 0.01)
['arrays_512cf_noCepf', 'X_CMN.txt'] (35264, 66)
Accuracy: 0.92 (+/- 0.01)
['arrays_512cf_noEn', 'X.txt'] (35264, 66)
Accuracy: 0.91 (+/- 0.01)
['arrays_512cf_noCepf', 'X.txt'] (35264, 66)
Accuracy: 0.90 (+/- 0.01)
