In [5]:
import numpy as np
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn import tree

In [6]:
# load data
tr = np.loadtxt('./dataset/treino_baseIAM_301_Filter_1x1_new2.txt')
ts = np.loadtxt('dataset/teste_baseIAM_301_Filter_1x1_new2.txt')
y_test  = ts[:, -1]
y_train = tr[:, -1]
X_train = tr[:, 0 : -1]
X_test  = ts[:, 0 : -1]

# Normaliza os dados...
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [4]:
from sklearn import decomposition

n_components = [16, 32, 64, 128, 256, 512]

nameTr = './dataset/treino_baseIAM_301_Filter_1x1_PCA_'
nameTs = './dataset/teste_baseIAM_301_Filter_1x1_PCA_'

for n in n_components:
    file_tr = nameTr + str(n)
    file_ts = nameTs + str(n)
    
    a_fileTr = open(file_tr, "w")
    a_fileTs = open(file_ts, "w")
    
    pca = decomposition.PCA(n)
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    np.savetxt(a_fileTr, X_train_pca)
    np.savetxt(a_fileTs, X_test_pca)

In [7]:
 # k-NN classifier
#from sklearn.metrics import classification_report
neigh = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
neigh.fit(X_train, y_train)
#neigh.score(X_test, y_test)
print(classification_report(y_test, neigh.predict(X_test), zero_division=0))

              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91         5
         1.0       1.00      0.40      0.57         5
         3.0       0.00      0.00      0.00         5
         5.0       1.00      1.00      1.00         5
         7.0       0.83      1.00      0.91         5
         8.0       1.00      0.80      0.89         5
         9.0       0.25      0.20      0.22         5
        10.0       1.00      1.00      1.00         5
        11.0       0.75      0.60      0.67         5
        12.0       0.31      0.80      0.44         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      0.40      0.57         5
        16.0       0.33      0.20      0.25         5
        17.0       0.83      1.00      0.91         5
        18.0       0.71      1.00      0.83         5
        19.0       0.83      1.00      0.91         5
        25.0       1.00      1.00      1.00         5
        26.0       0.62    

In [8]:
### Decision Tree
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)
print(clf.predict(X_test))
print(classification_report(y_test, clf.predict(X_test), zero_division=0))
#tree.plot_tree(clf)

[  0. 357. 450. ... 202.  62. 351.]
              precision    recall  f1-score   support

         0.0       0.67      0.40      0.50         5
         1.0       0.75      0.60      0.67         5
         3.0       0.00      0.00      0.00         5
         5.0       0.00      0.00      0.00         5
         7.0       0.57      0.80      0.67         5
         8.0       0.00      0.00      0.00         5
         9.0       0.33      0.40      0.36         5
        10.0       1.00      0.40      0.57         5
        11.0       0.20      0.20      0.20         5
        12.0       0.25      0.40      0.31         5
        13.0       1.00      0.80      0.89         5
        14.0       0.00      0.00      0.00         5
        16.0       0.00      0.00      0.00         5
        17.0       0.20      0.20      0.20         5
        18.0       0.60      0.60      0.60         5
        19.0       0.33      0.60      0.43         5
        25.0       0.20      0.20      0.20  

In [9]:
###SVM com Grid search
C_range = 2. ** np.arange(-5,15,2)
gamma_range = 2. ** np.arange(3,-15,-2)
#k = [ 'rbf']
# instancia o classificador, gerando probabilidades
srv = svm.SVC(probability=True, kernel='rbf')
#ss = StandardScaler()
pipeline = Pipeline([ ('scaler', scaler), ('svm', srv) ])

param_grid = {
    'svm__C' : C_range,
    'svm__gamma' : gamma_range
}
#        
#Faz a busca por melhores parâmetros...
grid = GridSearchCV(pipeline, param_grid, n_jobs=-1, verbose=True)
grid.fit(X_train, y_train)

# recupera o melhor modelo
model = grid.best_estimator_
print(classification_report(y_test, model.predict(X_test), zero_division=0))

Fitting 5 folds for each of 90 candidates, totalling 450 fits
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         5
         1.0       1.00      0.60      0.75         5
         3.0       0.00      0.00      0.00         5
         5.0       1.00      1.00      1.00         5
         7.0       0.83      1.00      0.91         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      0.40      0.57         5
        10.0       1.00      1.00      1.00         5
        11.0       0.67      0.40      0.50         5
        12.0       0.38      0.60      0.46         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        16.0       0.75      0.60      0.67         5
        17.0       1.00      1.00      1.00         5
        18.0       0.83      1.00      0.91         5
        19.0       0.83      1.00      0.91         5
        25.0       

In [10]:
### MLP  - Rede Neural Artificial

clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(20), random_state=1)
clf.fit(X_train, y_train)
print(clf.predict(X_test))
print(classification_report(y_test, clf.predict(X_test), zero_division=0))

[  0.   0. 450. ... 212. 671. 671.]
              precision    recall  f1-score   support

         0.0       1.00      0.60      0.75         5
         1.0       0.33      0.20      0.25         5
         3.0       0.00      0.00      0.00         5
         5.0       0.83      1.00      0.91         5
         7.0       0.62      1.00      0.77         5
         8.0       1.00      1.00      1.00         5
         9.0       0.67      0.40      0.50         5
        10.0       1.00      0.40      0.57         5
        11.0       0.67      0.40      0.50         5
        12.0       0.60      0.60      0.60         5
        13.0       1.00      0.80      0.89         5
        14.0       0.67      0.80      0.73         5
        16.0       0.75      0.60      0.67         5
        17.0       0.71      1.00      0.83         5
        18.0       0.60      0.60      0.60         5
        19.0       0.62      1.00      0.77         5
        25.0       1.00      0.80      0.89  

In [None]:
## Random Forest Classifier

# X, y = make_classification(n_samples=1000, n_features=4, n_informative=2, n_redundant=0, random_state=0, shuffle=False)
# clf = RandomForestClassifier(n_estimators=10000, max_depth=30, random_state=1)
# clf.fit(X_train, y_train)  
# #print(clf.feature_importances_)
# print(clf.predict(X_test))
# print(classification_report(y_test, clf.predict(X_test), zero_division=0))