In [113]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import GridSearchCV
from time import time


In [114]:
# Carga datos

X = pd.read_pickle('../pkls/dfAtributosNormalizado_0_dias_2_celdas.pkl')
Y = pd.read_pickle('../pkls/dfAvistamientos.pkl')
Y = np.ravel(Y)

In [115]:
# Validacion cruzada

from sklearn.model_selection import KFold

kf = KFold(n_splits=3)
kf.get_n_splits(X)

3

## Random forest

In [32]:
from sklearn.ensemble import RandomForestRegressor


In [88]:
def forest(atributos,resultado,k_n):
    params = {'bootstrap' : ['True', 'False'],
            'n_estimators': [50,100,200,500,1000],
            'max_depth': ['None',5,10,50,100],
            'max_features': [2,5,10,20,50,100,'auto','sqrt', 'log2']}
    inicio = time()
    model_random = GridSearchCV(estimator=RandomForestRegressor(), 
                           cv=k_n,
                           param_grid =params,
                           n_jobs = -1)

    model_random.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_random.best_params_,model_random.best_score_), flush=True)
    
    return tiempo,model_random

In [89]:
tiempo , modelo = forest(X,Y,3)

Tiempo empleado para Random Forest: 8.085378682613372 minutos
Best_params: {'bootstrap': 'False', 'max_depth': 5, 'max_features': 2, 'n_estimators': 100}
Best_score: -0.48527287230369853


# Nearest Neighbor

In [68]:
from sklearn.neighbors import KNeighborsRegressor

In [86]:
def vecino(atributos,resultado,k_n):

    # params = {'n_neighbors' : list(range(1,atributos.shape[1]))}
    params = {'n_neighbors' : random.sample(range(3, atributos.shape[1]), 10) ,
             'weights':['uniform', 'distance']}

    inicio = time()
    model_vecinos = GridSearchCV(estimator=KNeighborsRegressor(), 
                               cv=k_n,
                               param_grid=params,
                                n_jobs = -1)


    model_vecinos.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_vecinos.best_params_,model_vecinos.best_score_), flush=True)
    
    return tiempo,model_vecinos

In [87]:
tiempo , modelo = vecino(X,Y,3)

Tiempo empleado para Random Forest: 0.04386770327885946 minutos
Best_params: {'n_neighbors': 118, 'weights': 'uniform'}
Best_score: 0.018924132366616053


# SVM

In [62]:
from sklearn import svm

In [82]:
def SVR(atributos,resultado,k_n):

    params = {'kernel':('linear', 'rbf', 'poly', 'sigmoid'),
              'C' : [0.5,1.0,10,100],
              'gamma':['scale', 'auto'],
              'epsilon':[0.2]}

    inicio = time()

    model_SVR= GridSearchCV(estimator=svm.SVR(), 
                           cv=k_n,
                           param_grid=params,
                                n_jobs = -1)

    model_SVR.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_SVR.best_params_,model_SVR.best_score_), flush=True)
    
    return tiempo,model_SVR

In [83]:
tiempo , modelo = SVR(X,Y,3)

Tiempo empleado para Random Forest: 0.05447314182917277 minutos
Best_params: {'C': 100, 'epsilon': 0.2, 'gamma': 'scale', 'kernel': 'rbf'}
Best_score: -0.031257033117513044


# Arboles de decision

In [90]:
from sklearn.tree import DecisionTreeRegressor

In [92]:
def arbol_decision_grid(atributos,resultadon,k_n):
    
    params = {'max_depth':list(range(1,50)),
              'max_features' : ['auto', 'sqrt', 'log2',None]}

    inicio = time()
    
    model_tree= GridSearchCV(estimator=DecisionTreeRegressor(), 
                           cv=k_n,
                           param_grid=params,
                            n_jobs = 4)


    model_tree.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_tree.best_params_,model_tree.best_score_), flush=True)
    
    return tiempo,model_tree

In [None]:
tiempo , modelo = arbol_decision_grid(X,Y,3)

# Boosting

In [93]:
from sklearn.ensemble import GradientBoostingRegressor

In [95]:
def boosting(atributos,resultadon,k_n):
    
    params = {'n_estimators': randint(1, 200),
              'max_depth': randint(1, 100)}

    inicio = time()
    
    model_boos= RandomizedSearchCV(estimator=GradientBoostingRegressor(), 
                           cv=tscv,
                           n_iter=n,
                           param_distributions=params,
                           n_jobs = 4)


    model_boos.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_boos.best_params_,model_boos.best_score_), flush=True)
    
    return tiempo,model_boos

In [None]:
tiempo , modelo = boosting(X,Y,3)

# MLP (red neuronal)

In [116]:
from sklearn.neural_network import MLPRegressor

In [117]:
def MLP(atributos,resultado,k_n):

    params = {'alpha' : [0.00001,0.0001,0.001],
            'max_iter' : [1000,2000,5000],
            'random_state': [0,1,10]}

    inicio = time()

    model_MLP= GridSearchCV(estimator=MLPRegressor(), 
                           cv=k_n,
                           param_grid=params)


    model_MLP.fit(atributos, resultado)
    
    fin = time()
    tiempo = (fin - inicio)/60
    print('Tiempo empleado para Random Forest: {} minutos'.format(tiempo), flush=True)
    print('Best_params: {}\nBest_score: {}'.format(model_MLP.best_params_,model_MLP.best_score_), flush=True)
    
    return tiempo,model_MLP

In [None]:
tiempo , modelo = MLP(X,Y,3)





# Ensembles