## Creacion de datos sin preprocesamiento

Leemos las carpetas

In [1]:
import cv2
import numpy as np
import os
import glob
import pathlib
import csv
from skimage.feature import hog
from sklearn.model_selection import train_test_split

#### Creamos los csvs

In [2]:
ruta = 'Dataset_traffic_sign/'
vectorClases = os.listdir(ruta)
nClases = len(vectorClases)
tam = 100
for clase,carpeta in enumerate(vectorClases):
    l = []
    rutaImagenes = ruta+carpeta
    vectorImagenes = os.listdir(rutaImagenes)
    nImagenes = len(vectorImagenes)
    for j in vectorImagenes:
        #Leemos imagen y aplicamos HOG
        #Leemos una imagen de la carpeta
        rutaImg = rutaImagenes+'/'+j
        img = cv2.imread(rutaImg)
        img = cv2.resize(img,(tam,tam))
        #Extraemos las caracteristicas con hog
        hog_img = hog(img)
        l.append(hog_img)
        
    #Añadir la columna de la clase
    datos = np.array(l)
    y = np.full((nImagenes,1),clase)
    datos = np.hstack((datos,y))
    #Creamos csv de la clase
    rutaCsv = 'csvs/datos'+str(clase)+'.csv'
    pathlib.Path('csvs/').mkdir(parents=True, exist_ok=True) 
    with open(rutaCsv, 'a+', newline='') as myfile:
        wr = csv.writer(myfile)
        wr.writerows(datos)
        myfile.close()
        
n_features = datos.shape[1]

print('El numero de caracteristicas es {}\n'.format(n_features))

El numero de caracteristicas es 8101



In [3]:
data = np.loadtxt('csvs/datos0.csv',delimiter=',')
data.shape

(6, 8101)

### Una vez creados los csvs creamos los conjuntos de train y test

In [4]:
ruta = 'csvs/'
l_train = np.empty((0, n_features))
l_test = np.empty((0, n_features))

for i,csv_string in enumerate(sorted(os.listdir(ruta),key = lambda x: int(x.split('.')[0][len('datos'):]))):
    print("Creating train and test sets for {}".format(csv_string))
    rutaCsv = ruta+csv_string
    #Leemos los datos del csv_i y los guardamos en data
    data = np.loadtxt(rutaCsv,delimiter=',')
    X= data[:,:-1]
    y= data[:,-1].reshape(-1,1)
    
    #Creamos train y test con sklearn
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=2020)
    
    train = np.hstack((Xtrain,ytrain))
    test = np.hstack((Xtest,ytest))
    print("Total samples {}\ntrain samples {}\ntest samples {}\n".format(data.shape[0],train.shape[0],test.shape[0]))
    l_train = np.append(l_train,train).reshape(-1,n_features)
    l_test = np.append(l_test,test).reshape(-1,n_features)

Creating train test for datos0.csv
Total samples 6
train samples 4
test samples 2

Creating train test for datos1.csv
Total samples 73
train samples 51
test samples 22

Creating train test for datos2.csv
Total samples 74
train samples 51
test samples 23

Creating train test for datos3.csv
Total samples 46
train samples 32
test samples 14

Creating train test for datos4.csv
Total samples 65
train samples 45
test samples 20

Creating train test for datos5.csv
Total samples 61
train samples 42
test samples 19

Creating train test for datos6.csv
Total samples 13
train samples 9
test samples 4

Creating train test for datos7.csv
Total samples 47
train samples 32
test samples 15

Creating train test for datos8.csv
Total samples 46
train samples 32
test samples 14

Creating train test for datos9.csv
Total samples 48
train samples 33
test samples 15

Creating train test for datos10.csv
Total samples 66
train samples 46
test samples 20

Creating train test for datos11.csv
Total samples 43
train

Guardamos los datos de train y test en archivos .npy

In [5]:
print(l_train.shape)
print(l_test.shape)
np.save('train_img_features_non_processed',l_train)
np.save('test_img_features_non_processed',l_test)

(866, 8101)
(398, 8101)


## Creacion de datos con preprocesamiento

Para mejorar el clasificador vamos procesar las imagenes para reducir ruido y darles un formato, es decir, un tamaño concreto rango de colores etc.
- Aumento del contraste
- Aumento de rango dinamico
- Ecualizacion del histograma
- Ajustar aspect ratio

In [6]:
# nImagenes = 7
# for i in os.listdir('Dataset_traffic_sign/00008')[:nImagenes]:
#     img_i = cv2.imread('Dataset_traffic_sign/00008/'+i)
#     adjusted_contrast = cv2.convertScaleAbs(img_i,alpha=2,beta=5)
#     result = np.hstack((img_i,adjusted_contrast))
#     cv2.imshow(i,result)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

CSVs procesados

In [7]:
ruta = 'Dataset_traffic_sign/'
vectorClases = os.listdir(ruta)
nClases = len(vectorClases)
tam = 100
for clase,carpeta in enumerate(vectorClases):
    l = []
    rutaImagenes = ruta+carpeta
    vectorImagenes = os.listdir(rutaImagenes)
    nImagenes = len(vectorImagenes)
    for j in vectorImagenes:
        #Leemos imagen y aplicamos HOG
        #Leemos una imagen de la carpeta
        rutaImg = rutaImagenes+'/'+j
        img = cv2.imread(rutaImg)
        img = cv2.resize(img,(tam,tam))
        #Ajuste de contraste
        img = cv2.convertScaleAbs(img,alpha=2,beta=5)
        
        #Extraemos las caracteristicas con hog
        hog_img = hog(img)
        l.append(hog_img)
        
    #Añadir la columna de la clase
    datos = np.array(l)
    y = np.full((nImagenes,1),clase)
    datos = np.hstack((datos,y))
    #Creamos csv de la clase
    rutaCsv = 'csvs_processed/datos'+str(clase)+'.csv'
    pathlib.Path('csvs_processed/').mkdir(parents=True, exist_ok=True) 
    with open(rutaCsv, 'w+', newline='') as myfile:
        wr = csv.writer(myfile)
        wr.writerows(datos)
        myfile.close()
        
n_features = datos.shape[1]

print('El numero de caracteristicas es {}\n'.format(n_features))

El numero de caracteristicas es 8101



In [8]:
ruta = 'csvs_processed/'
l_train = np.empty((0, n_features))
l_test = np.empty((0, n_features))

for i,csv_string in enumerate(sorted(os.listdir(ruta),key = lambda x: int(x.split('.')[0][len('datos'):]))):
    print("Creating train and test sets for {}".format(csv_string))
    rutaCsv = ruta+csv_string
    #Leemos los datos del csv_i y los guardamos en data
    data = np.loadtxt(rutaCsv,delimiter=',')
    X= data[:,:-1]
    y= data[:,-1].reshape(-1,1)
    
    #Creamos train y test con sklearn
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=2020)
    
    train = np.hstack((Xtrain,ytrain))
    test = np.hstack((Xtest,ytest))
    print("Total samples {}\ntrain samples {}\ntest samples {}\n".format(data.shape[0],train.shape[0],test.shape[0]))
    l_train = np.append(l_train,train).reshape(-1,n_features)
    l_test = np.append(l_test,test).reshape(-1,n_features)
    
print(l_train.shape)
print(l_test.shape)
np.save('train_img_features_processed',l_train)
np.save('test_img_features_processed',l_test)

Creating train test for datos0.csv
Total samples 6
train samples 4
test samples 2

Creating train test for datos1.csv
Total samples 73
train samples 51
test samples 22

Creating train test for datos2.csv
Total samples 74
train samples 51
test samples 23

Creating train test for datos3.csv
Total samples 46
train samples 32
test samples 14

Creating train test for datos4.csv
Total samples 65
train samples 45
test samples 20

Creating train test for datos5.csv
Total samples 61
train samples 42
test samples 19

Creating train test for datos6.csv
Total samples 13
train samples 9
test samples 4

Creating train test for datos7.csv
Total samples 47
train samples 32
test samples 15

Creating train test for datos8.csv
Total samples 46
train samples 32
test samples 14

Creating train test for datos9.csv
Total samples 48
train samples 33
test samples 15

Creating train test for datos10.csv
Total samples 66
train samples 46
test samples 20

Creating train test for datos11.csv
Total samples 43
train