In [1]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
from glob import glob
from skimage.feature import hog
from skimage import exposure
from skimage import segmentation
from sklearn.cluster import KMeans
from sklearn.model_selection import ShuffleSplit
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn import naive_bayes
import os
import csv
from sklearn.model_selection import train_test_split
from funcionesProyecto import funcionLeerClase, funcionLeerTodasClases, ecualizacionAdaptativa, imagen_media_color, quitarFondo, generarHogClase, generarCsv

<br/><br/>**<u>PREPARAMOS LOS DATOS</u>:**<br/><br/><br/><br/>

In [2]:
datosLista = funcionLeerTodasClases('train')

En train/10c, hay 254 ejemplos
En train/1c, hay 77 ejemplos
En train/1e, hay 248 ejemplos
En train/20c, hay 227 ejemplos
En train/2c, hay 25 ejemplos
En train/2e, hay 45 ejemplos
En train/50c, hay 131 ejemplos
En train/5c, hay 284 ejemplos


In [3]:
fd = hog(datosLista[0][0], orientations=8, pixels_per_cell=(8, 8),cells_per_block=(7, 7))
numeroColumnas = fd.size
datos = np.empty((0,numeroColumnas+1))
for i,clase in enumerate(datosLista):
    array = generarHogClase(clase)
    array = np.hstack((array, np.full((array.shape[0], 1), i)))
    print(array.shape)
    datos = np.vstack((datos, array))
print(datos.shape)

(254, 3529)
(77, 3529)
(248, 3529)
(227, 3529)
(25, 3529)
(45, 3529)
(131, 3529)
(284, 3529)
(1291, 3529)


In [4]:
X = datos[:,:-1]
y = datos[:,-1].reshape(-1,1)
print(X.shape, y.shape)
Xtrain, Xval_test, ytrain, yval_test = train_test_split(X, y, test_size=0.4, random_state=42)
Xval, Xtest, yval, ytest = train_test_split(Xval_test, yval_test, test_size=0.5, random_state=42)
print(Xtrain.shape, ytrain.shape)
print(Xval.shape, yval.shape)
print(Xtest.shape, ytest.shape)

(1291, 3528) (1291, 1)
(774, 3528) (774, 1)
(258, 3528) (258, 1)
(259, 3528) (259, 1)


In [5]:
datosTest = funcionLeerClase('public_test')
print(len(datosTest))

964


In [6]:
fd = hog(datosTest[0], orientations=8, pixels_per_cell=(8, 8),cells_per_block=(7, 7))
numeroColumnas = fd.size
test = generarHogClase(datosTest)
print(test.shape)

(964, 3528)


<br/><br/>**<u>PROBAMOS SVM</u>:**<br/><br/><br/><br/>

In [None]:
#Hacernos una idea del rendimiento: partimos train en tres conjuntos
C_values = [10**-4,10**-3,10**-2, 0.1,1,2,3,4, 10, 100]
gamma_values = [10**-4,10**-3,10**-2, 0.1,1, 10, 100]
params = {'C': C_values, 'gamma': gamma_values}
rs = ShuffleSplit(n_splits=1, test_size= 0.2, random_state=0)
svc = svm.SVC()
clf = GridSearchCV(svc, params, cv=rs, n_jobs=-1)
clf = clf.fit(Xtrain,ytrain.ravel())
C_best = clf.best_params_['C']
gamma_best = clf.best_params_['gamma']
print("Mejores parámetros C: {}, gamma: {}".format(C_best, gamma_best))
svc = svm.SVC(gamma=gamma_best, C=C_best)
svc = svc.fit(Xtrain,ytrain.ravel())
print('PRECISION:')
accTrain = svc.score(Xtrain,ytrain)
print('\tTRAIN:', accTrain)
accVal = svc.score(Xval,yval)
print('\tVAL:', accVal)
accTest = svc.score(Xtest,ytest)
print('\tTEST:', accTest)

In [7]:
#Entrenamos con todos los ejemplos de train
C_values = [0.1,1,2,3,4,5,6,7]
gamma_values = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
params = {'C': C_values, 'gamma': gamma_values}
rs = ShuffleSplit(n_splits=1, test_size= 0.2, random_state=0)
svc = svm.SVC()
clf = GridSearchCV(svc, params, cv=rs, n_jobs=-1)
clf = clf.fit(X,y.ravel())
C_best = clf.best_params_['C']
gamma_best = clf.best_params_['gamma']
print("Mejores parámetros C: {}, gamma: {}".format(C_best, gamma_best))
svc = svm.SVC(gamma=gamma_best, C=C_best)
svc = svc.fit(X,y.ravel())
accTrain = svc.score(X,y)
print(accTrain)

Mejores parámetros C: 7, gamma: 0.06
0.9512006196746708


In [8]:
test_labels = svc.predict(test)

<br/><br/>**<u>PROBAMOS REDES</u>:**<br/><br/><br/><br/>

In [None]:
from sklearn.neural_network import MLPClassifier
#RED NEURONAL
rs = ShuffleSplit(n_splits=1, test_size= 0.2, random_state=0)
hidden_sizes = range(1, 25, 2)
metodoPesos = ['lbfgs', 'sgd', 'adam']
funcionActivacion = ['identity', 'logistic', 'tanh', 'relu']
clf = MLPClassifier(max_iter = 250)
parameters = {'hidden_layer_sizes': hidden_sizes, 'solver': metodoPesos, 'activation': funcionActivacion}
gridSearch = GridSearchCV(clf, scoring = 'neg_mean_squared_error',param_grid = parameters, cv=rs, n_jobs=-1)
gridSearch = gridSearch.fit(X,y)
hidden_layer_sizes_best = gridSearch.best_params_['hidden_layer_sizes']
funcionActivacion_best = gridSearch.best_params_['activation']
metodoPesos_best = gridSearch.best_params_['solver']
clf = MLPClassifier(max_iter = 250, hidden_layer_sizes = hidden_layer_sizes_best, solver = metodoPesos_best,activation =  funcionActivacion_best)
clf = clf.fit(X,y)
salidasTrain = clf.predict(X)
Etrain = metrics.mean_squared_error(salidasTrain, y)
print(Etrain)

In [None]:
test_labels = clf.predict(test)

<br/><br/>**<u>GENERAMOS LOS RESULTADOS</u>:**<br/><br/><br/><br/>

In [9]:
generarCsv('public_test', test_labels)