# Clasificadores

En este notebook se entrenaran los distintos clasificadores a probar, utilizando las características previamente serializadas en el *Jupyter notebook: 1_HoG*.

A su vez, los clasificadores entrenados en este notebook serán serializados por motivos de rendimiento y facilidad de uso.

## 1. Deserializamos las características

In [1]:
import pickle

path = '..\\..\\rsc\\obj\\'

X_train_path = path + 'X_train.sav'
y_train_path = path + 'y_train.sav'

X_train = pickle.load(open(X_train_path, 'rb'))
y_train = pickle.load(open(y_train_path, 'rb'))

In [3]:
print(X_train.shape)

(53688, 1215)


# 2. Entrenamos múltiples clasificadores

## 2.1. SVM

In [8]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

svm_path = path + 'svm_clf.sav'

grid = GridSearchCV(LinearSVC(), {'C': [1.0, 2.0, 4.0, 8.0]})

grid.fit(X_train, y_train)

# Re-entrenamiento partiendo del mejor estimador
svm_clf = grid.best_estimator_

svm_clf.fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(svm_clf, open(svm_path, 'wb'))

## 2.2. Gradient tree boosting 

In [9]:
from sklearn.ensemble import GradientBoostingClassifier

gtb_path = path + 'gtb_clf.sav'

gtb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
                                 max_depth=1, random_state=0).fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(gtb_clf, open(gtb_path, 'wb'))

## 2.3. Random forests

In [5]:
from sklearn.ensemble import RandomForestClassifier

rf_path = path + 'rf_clf.sav'

rf_clf = RandomForestClassifier(n_estimators=10, max_depth=None,
                             min_samples_split=2, random_state=0).fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(rf_clf, open(rf_path, 'wb'))

## 2.4. SVM con probabilidades

In [None]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV


svm2_path = path + 'svm2_clf.sav'

svm2_clf = svm.SVC(gamma=2, C=1)
svm2_clf.fit(X_train, y_train) 


#Serializamos clasfificador
pickle.dump(svm2_clf, open(svm2_path, 'wb'))

In [None]:
svm3_path = path + 'svm3_clf.sav'

grid2 = GridSearchCV(svm.SVC(), {'C': [1.0, 2.0, 4.0, 8.0]})

grid2.fit(X_train, y_train)

# Re-entrenamiento partiendo del mejor estimador
svm3_clf = grid.best_estimator_

svm3_clf.fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(svm3_clf, open(svm3_path, 'wb'))

## 2.5. Linear regression

In [4]:
from sklearn import linear_model

regr_path = path + 'rgr_clf.sav'
# Create linear regression object
regr_clf = linear_model.LinearRegression()

# Train the model using the training sets
regr_clf.fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(regr_clf, open(regr_path, 'wb'))

## 2.6. Bayes

In [5]:
from sklearn.naive_bayes import GaussianNB

gnb_path = path + 'gnb_clf.sav'

gnb_clf = GaussianNB()

gnb_clf.fit(X_train, y_train)

#Serializamos clasfificador
pickle.dump(gnb_clf, open(gnb_path, 'wb'))