# Experimento 1
## Regresión

La prueba de asentamiento del concreto mide la consistencia del concreto fresco antes de que fragüe. 

Se realiza para comprobar la trabajabilidad del hormigón recién hecho y, por tanto, la facilidad con la que fluye el hormigón. También se puede utilizar como indicador de un lote mal mezclado.


Variables entrada (7)(componente kg por  M^3 concrete):
* Cement
* Slag
* Fly ash
* Water
* SP
* Coarse Aggr.
* Fine Aggr.

Variables salida (3):
* SLUMP (cm)
* FLOW (cm)
* **28-day Compressive Strength (Mpa)**

Data Source: https://archive.ics.uci.edu/ml/datasets/Concrete+Slump+Test

*Credit: Yeh, I-Cheng, "Modeling slump flow of concrete using second-order regressions and artificial neural networks," Cement and Concrete Composites, Vol.29, No. 6, 474-480, 2007.*

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import strftime, gmtime


In [47]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [4]:
from sagemaker import experiments

In [5]:
df = pd.read_csv('cement_slump.csv')

In [6]:
df.head()

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0,34.99
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0,41.14
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0,1.0,20.0,41.81
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0,3.0,21.5,42.08
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0,20.0,64.0,26.82


In [7]:
df.corr()['Compressive Strength (28-day)(Mpa)']

Cement                                0.445656
Slag                                 -0.331522
Fly ash                               0.444380
Water                                -0.254320
SP                                   -0.037909
Coarse Aggr.                         -0.160610
Fine Aggr.                           -0.154532
SLUMP(cm)                            -0.223499
FLOW(cm)                             -0.124189
Compressive Strength (28-day)(Mpa)    1.000000
Name: Compressive Strength (28-day)(Mpa), dtype: float64

In [8]:
sns.heatmap(df.corr(),cmap='viridis')

<matplotlib.axes._subplots.AxesSubplot at 0x7fc59defa950>

In [9]:
df.columns

Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')

## Train | Test Split

Alternatively you could also set this up as a pipline, something like:

    >>> from sklearn.pipeline import make_pipeline
    >>> from sklearn.preprocessing import StandardScaler
    >>> from sklearn.svm import SVR

    >>> clf = make_pipeline(StandardScaler(), SVR())

In [10]:
df.columns

Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')

In [11]:
X = df.drop('Compressive Strength (28-day)(Mpa)',axis=1)
y = df['Compressive Strength (28-day)(Mpa)']

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()

In [16]:
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [17]:
from sagemaker.utils import unique_name_from_base

experiment_name = unique_name_from_base("Experimento01")

In [18]:
experiment_name

'Experimento01-1683102702-44ae'

In [19]:
run_name = "Experimento01-run"

## Linear Regression

In [20]:
from sklearn import linear_model

### Entrenamiento

In [49]:
run_name = "linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     linear_regr = linear_model.LinearRegression()
     linear_regr.fit(scaled_X_train,y_train)
     print(linear_regr.coef_.tolist())
     #verificacion con datos de train
     llr_y_predict=linear_regr.predict(scaled_X_train)
     llr_mae=mean_absolute_error(y_train,llr_y_predict)
     llr_mse=np.sqrt(mean_squared_error(y_train,llr_y_predict))
     llr_r2= r2_score(y_train,llr_y_predict)
     run.log_metric(name="train:mae",value=llr_mae)
     run.log_metric(name="train:mse",value=llr_mse) 
     run.log_metric(name="train:r2_score",value=llr_r2) 
     #verificacion con datos de test
     llr_y_predict=linear_regr.predict(scaled_X_test)
     llr_mae=mean_absolute_error(y_test,llr_y_predict)
     llr_mse=np.sqrt(mean_squared_error(y_test,llr_y_predict))
     llr_r2= r2_score(y_test,llr_y_predict)
     run.log_metric(name="test:mae",value=llr_mae)
     run.log_metric(name="test:mse",value=llr_mse)
     run.log_metric(name="train:r2_score",value=llr_r2) 
    
     

[4.746863826765784, -1.5960356782690068, 4.272894632642386, -3.5100334188025766, -1.0783702630104201e-05, -3.979558080525755, -2.207670665641814, -1.774459611346505, 1.1652870265066777]


## Bayesian linear regression

In [22]:
from sklearn import linear_model

### Entrenamiento


In [23]:
run_name = "bayesian-linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #crea modelo
     bayesian_reg = linear_model.BayesianRidge(verbose=True)
     bayesian_params=bayesian_reg.get_params()
     run.log_parameter('alpha_1',bayesian_params['alpha_1'] )
     run.log_parameter('alpha_2',bayesian_params['alpha_2'] )   
     run.log_parameter('lambda_1',bayesian_params['lambda_1'] )
     run.log_parameter('lambda_2',bayesian_params['lambda_2'] ) 
     run.log_parameter('n_iter',bayesian_params['n_iter'] )    
     #entrenar modelo
     bayesian_reg.fit(scaled_X_train,y_train)   
     #verificar entrenamiento con datos de train
     bayesian_reg_y_predict=bayesian_reg.predict(scaled_X_train)
     bayeasian_mae=mean_absolute_error(y_train,bayesian_reg_y_predict)
     bayeasian_mse=np.sqrt(mean_squared_error(y_train,bayesian_reg_y_predict))
     run.log_metric(name="train:mae",value=bayeasian_mae)
     run.log_metric(name="train:mse",value=bayeasian_mse)   
     #verificar entrenamiento con datos de test
     bayesian_reg_y_predict=bayesian_reg.predict(scaled_X_test)
     bayeasian_mae=mean_absolute_error(y_test,bayesian_reg_y_predict)
     bayeasian_mse=np.sqrt(mean_squared_error(y_test,bayesian_reg_y_predict))
     run.log_metric(name="test:mae",value=bayeasian_mae)
     run.log_metric(name="test:mse",value=bayeasian_mse)

Convergence after  4  iterations


### Grid Search

In [24]:
from sklearn.model_selection import GridSearchCV

In [25]:
run_name = "grid-bayesian-linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     bayesian_reg_param_grid= {
       "n_iter" : [1,10,50,150,300,600],
       "alpha_1" : [1e-3,1e-6,1e-9],
       "alpha_2" : [1e-3,1e-6,1e-9],
       "lambda_1" : [1e-3,1e-6,1e-9],
       "lambda_2" : [1e-3,1e-6,1e-9],
       "normalize" : [False,True]
      }
     bayesian_reg = linear_model.BayesianRidge()
     bayesian_grid = GridSearchCV(bayesian_reg,param_grid=bayesian_reg_param_grid)
     bayesian_grid.fit(scaled_X_train,y_train)
     print(bayesian_grid.best_params_)
     run.log_parameter(name="alpha_1",value=bayesian_grid.best_params_['alpha_1'])   
     run.log_parameter(name="alpha_2",value=bayesian_grid.best_params_['alpha_2'])   
     run.log_parameter(name="lambda_1",value=bayesian_grid.best_params_['lambda_1'])     
     run.log_parameter(name="lambda_2",value=bayesian_grid.best_params_['lambda_2'])        
     run.log_parameter(name="n_iter",value=bayesian_grid.best_params_['n_iter'])     
     
     #evaluar con datos de train
     bayesian_grid_preds = bayesian_grid.predict(scaled_X_train)
     bayeasian_mae=mean_absolute_error(y_train,bayesian_grid_preds)
     bayeasian_mse=np.sqrt(mean_squared_error(y_train,bayesian_grid_preds))
     run.log_metric(name="train:mae",value=bayeasian_mae)
     run.log_metric(name="train:mse",value=bayeasian_mse)
     #evaluar con datos de test
     bayesian_grid_preds = bayesian_grid.predict(scaled_X_test)
     bayeasian_mae=mean_absolute_error(y_test,bayesian_grid_preds)
     bayeasian_mse=np.sqrt(mean_squared_error(y_test,bayesian_grid_preds))
     run.log_metric(name="test:mae",value=bayeasian_mae)
     run.log_metric(name="test:mse",value=bayeasian_mse)
    

{'alpha_1': 0.001, 'alpha_2': 1e-09, 'lambda_1': 1e-09, 'lambda_2': 0.001, 'n_iter': 1, 'normalize': False}


## K-Nearest Neighbors


In [26]:
from sklearn import neighbors

### Entrenamiento

In [27]:
run_name = "knn-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     knn_regression = neighbors.KNeighborsRegressor()
     knn_regression.fit(scaled_X_train, y_train)
     print(knn_regression.get_params())
     #verificacion con datos de train
     knn_y_predict=knn_regression.predict(scaled_X_train)
     knn_mae=mean_absolute_error(y_train,knn_y_predict)
     knn_mse=np.sqrt(mean_squared_error(y_train,knn_y_predict))
     run.log_metric(name="train:mae",value=llr_mae)
     run.log_metric(name="train:mse",value=llr_mse) 
     #verificacion con datos de test
     knn_y_predict=knn_regression.predict(scaled_X_test)
     knn_mae=mean_absolute_error(y_test,knn_y_predict)
     knn_mse=np.sqrt(mean_squared_error(y_test,knn_y_predict))
     run.log_metric(name="test:mae",value=llr_mae)
     run.log_metric(name="test:mse",value=llr_mse)

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


### Grid Search

In [28]:
run_name = "grid-knn-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     knn_reg_param_grid= {
       "n_neighbors" : [1,2,3,5,7,10,25,50],
       "algorithm" : ["auto","ball_tree","kd_tree","brute"],
       "metric": ['euclidean','manhattan','l1','l2']
      }

     knn_grid = GridSearchCV(estimator=neighbors.KNeighborsRegressor(),param_grid=knn_reg_param_grid)
     knn_grid.fit(scaled_X_train,y_train)
     print(knn_grid.best_params_)
     run.log_parameter(name="n_neighbors",value=knn_grid.best_params_['n_neighbors'])
     run.log_parameter(name="metric",value=knn_grid.best_params_['metric'])
     run.log_parameter(name="algorithm",value=knn_grid.best_params_['algorithm'])     

     #evaluar con datos de train
     knn_grid_preds = knn_grid.predict(scaled_X_train)
     knn_mae=mean_absolute_error(y_train,knn_grid_preds)
     knn_mse=np.sqrt(mean_squared_error(y_train,knn_grid_preds))
     run.log_metric(name="train:mae",value=knn_mae)
     run.log_metric(name="train:mse",value=knn_mse)
     #evaluar con datos de test
     knn_grid_preds = knn_grid.predict(scaled_X_test)
     knn_mse=mean_absolute_error(y_test,knn_grid_preds)
     bayeasian_mse=np.sqrt(mean_squared_error(y_test,knn_grid_preds))
     run.log_metric(name="test:mae",value=knn_mae)
     run.log_metric(name="test:mse",value=knn_mse)

{'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 2}


## Decision Tree

In [29]:
from sklearn import tree

### Entrenamiento

In [30]:
run_name = "tree-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     decision_tree_reg = tree.DecisionTreeRegressor()
     decision_tree_reg.fit(scaled_X_train, y_train)
     print(decision_tree_reg.get_params())
     #verificacion con datos de train
     decision_tree_reg_y_pred=decision_tree_reg.predict(scaled_X_train)
     tree_mae=mean_absolute_error(y_train,decision_tree_reg_y_pred)
     tree_mse=np.sqrt(mean_squared_error(y_train,decision_tree_reg_y_pred))
     print(tree_mae)
     print(tree_mse)
     run.log_metric(name="train:mae",value=tree_mae)
     run.log_metric(name="train:mse",value=tree_mse) 
     #verificacion con datos de test
     decision_tree_reg_y_pred=decision_tree_reg.predict(scaled_X_test)
     tree_mae=mean_absolute_error(y_test,decision_tree_reg_y_pred)
     tree_mse=np.sqrt(mean_squared_error(y_test,decision_tree_reg_y_pred))
     run.log_metric(name="test:mae",value=tree_mae)
     run.log_metric(name="test:mse",value=tree_mse)

{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}
0.0
0.0


### Grid Search

In [31]:
run_name = "grid-tree-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     decision_tree_reg_param_grid= {
       "max_features" : [ "auto", "sqrt", "log2"],
       "criterion" : ["mse", "friedman_mse"],
      }

     decision_tree_grid = GridSearchCV(estimator=tree.DecisionTreeRegressor(),param_grid=decision_tree_reg_param_grid)
     decision_tree_grid.fit(scaled_X_train,y_train)
     print(decision_tree_grid.best_params_)
     run.log_parameter(name="max_features",value=decision_tree_grid.best_params_['max_features'])
     run.log_parameter(name="criterion",value=decision_tree_grid.best_params_['criterion'])

     #evaluar con datos de train
     tree_grid_preds = decision_tree_grid.predict(scaled_X_train)
     tree_mae=mean_absolute_error(y_train,tree_grid_preds)
     tree_mse=np.sqrt(mean_squared_error(y_train,tree_grid_preds))
     run.log_metric(name="train:mae",value=tree_mae)
     run.log_metric(name="train:mse",value=tree_mse)
     #evaluar con datos de test
     tree_grid_preds = decision_tree_grid.predict(scaled_X_test)
     tree_mae=mean_absolute_error(y_test,tree_grid_preds)
     tree_mse=np.sqrt(mean_squared_error(y_test,tree_grid_preds))
     run.log_metric(name="test:mae",value=tree_mae)
     run.log_metric(name="test:mse",value=tree_mse)

{'criterion': 'mse', 'max_features': 'auto'}


## Random Forest


In [32]:
from sklearn.ensemble import RandomForestRegressor

### Entrenamiento

In [33]:
run_name = "randomforest-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     rforest_regression = RandomForestRegressor()
     rforest_regression.fit(scaled_X_train, y_train)
     print(rforest_regression.get_params())
     #verificacion con datos de train
     rforest_regression_y_pred=rforest_regression.predict(scaled_X_train)
     rforest_mae=mean_absolute_error(y_train,rforest_regression_y_pred)
     rforest_mse=np.sqrt(mean_squared_error(y_train,rforest_regression_y_pred))
     run.log_metric(name="train:mae",value=rforest_mae)
     run.log_metric(name="train:mse",value=rforest_mse) 
     #verificacion con datos de test
     rforest_regression_y_pred=rforest_regression.predict(scaled_X_test)
     rforest_mae=mean_absolute_error(y_test,rforest_regression_y_pred)
     rforest_mse=np.sqrt(mean_squared_error(y_test,rforest_regression_y_pred))
     run.log_metric(name="test:mae",value=rforest_mae)
     run.log_metric(name="test:mse",value=rforest_mse)

{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


## XGBoost

In [34]:
!pip install xgboost

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [35]:
import xgboost as xgb

## Entrenamiento

In [36]:
run_name = "xgboost-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     xgb_regression = xgb.XGBRegressor()
     xgb_regression.fit(scaled_X_train, y_train)
     print(xgb_regression.get_params())
     #verificacion con datos de train
     xgb_regression_y_pred=xgb_regression.predict(scaled_X_train)
     xgb_mae=mean_absolute_error(y_train,xgb_regression_y_pred)
     xgb_mse=np.sqrt(mean_squared_error(y_train,xgb_regression_y_pred))
     run.log_metric(name="train:mae",value=xgb_mae)
     run.log_metric(name="train:mse",value=xgb_mse) 
     #verificacion con datos de test
     xgb_regression_y_pred=xgb_regression.predict(scaled_X_test)
     xgb_mae=mean_absolute_error(y_test,xgb_regression_y_pred)
     xgb_mse=np.sqrt(mean_squared_error(y_test,rforest_regression_y_pred))
     run.log_metric(name="test:mae",value=xgb_mae)
     run.log_metric(name="test:mse",value=xgb_mse)

{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'callbacks': None, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'gamma': 0, 'gpu_id': -1, 'grow_policy': 'depthwise', 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.300000012, 'max_bin': 256, 'max_cat_to_onehot': 4, 'max_delta_step': 0, 'max_depth': 6, 'max_leaves': 0, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 100, 'n_jobs': 0, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 0, 'reg_alpha': 0, 'reg_lambda': 1, 'sampling_method': 'uniform', 'scale_pos_weight': 1, 'subsample': 1, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None}


## Linear Support Vector Machine


In [37]:
from sklearn.svm import LinearSVR

### Entrenamiento

In [38]:
run_name = "linearsvm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     svm_regression = LinearSVR()
     svm_regression.fit(scaled_X_train, y_train)
     print(svm_regression.get_params())
     #verificacion con datos de train
     svm_regression_y_pred=svm_regression.predict(scaled_X_train)
     svm_mae=mean_absolute_error(y_train,svm_regression_y_pred)
     svm_mse=np.sqrt(mean_squared_error(y_train,svm_regression_y_pred))
     run.log_metric(name="train:mae",value=svm_mae)
     run.log_metric(name="train:mse",value=svm_mse) 
     #verificacion con datos de test
     svm_regression_y_pred=svm_regression.predict(scaled_X_test)
     svm_mae=mean_absolute_error(y_test,svm_regression_y_pred)
     svm_mse=np.sqrt(mean_squared_error(y_test,svm_regression_y_pred))
     run.log_metric(name="test:mae",value=svm_mae)
     run.log_metric(name="test:mse",value=svm_mse)

{'C': 1.0, 'dual': True, 'epsilon': 0.0, 'fit_intercept': True, 'intercept_scaling': 1.0, 'loss': 'epsilon_insensitive', 'max_iter': 1000, 'random_state': None, 'tol': 0.0001, 'verbose': 0}


In [39]:
### Grid Search

In [40]:
run_name = "grid-linearsvm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     svm_param_grid= {
       'C':[0.001,0.01,0.1,0.5,1],
       'epsilon':[0,0.01,0.1,0.5,1,2]  
      }

     svm_grid = GridSearchCV(estimator=LinearSVR(),param_grid=svm_param_grid)
     svm_grid.fit(scaled_X_train,y_train)
     print(svm_grid.best_params_)
     run.log_parameter(name="C",value=svm_grid.best_params_['C'])
     run.log_parameter(name="epsilon",value=svm_grid.best_params_['epsilon'])
     #evaluar con datos de train
     svm_grid_preds = svm_grid.predict(scaled_X_train)
     svm_mae=mean_absolute_error(y_train,svm_grid_preds)
     svm_mse=np.sqrt(mean_squared_error(y_train,svm_grid_preds))
     run.log_metric(name="train:mae",value=svm_mae)
     run.log_metric(name="train:mse",value=svm_mse)
     #evaluar con datos de test
     svm_grid_preds = svm_grid.predict(scaled_X_test)
     svm_mae=mean_absolute_error(y_test,svm_grid_preds)
     svm_mse=np.sqrt(mean_squared_error(y_test,svm_grid_preds))
     run.log_metric(name="test:mae",value=svm_mae)
     run.log_metric(name="test:mse",value=svm_mse)

{'C': 1, 'epsilon': 0.01}


## Support Vector Machine


In [41]:
from sklearn.svm import SVR

### Entrenamiento

In [42]:
run_name = "svm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     svm_regression = SVR()
     svm_regression.fit(scaled_X_train, y_train)
     print(svm_regression.get_params())
     #verificacion con datos de train
     svm_regression_y_pred=svm_regression.predict(scaled_X_train)
     svm_mae=mean_absolute_error(y_train,svm_regression_y_pred)
     svm_mse=np.sqrt(mean_squared_error(y_train,svm_regression_y_pred))
     run.log_metric(name="train:mae",value=svm_mae)
     run.log_metric(name="train:mse",value=svm_mse) 
     #verificacion con datos de test
     svm_regression_y_pred=svm_regression.predict(scaled_X_test)
     svm_mae=mean_absolute_error(y_test,svm_regression_y_pred)
     svm_mse=np.sqrt(mean_squared_error(y_test,svm_regression_y_pred))
     run.log_metric(name="test:mae",value=svm_mae)
     run.log_metric(name="test:mse",value=svm_mse)

{'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}


### Grid Search

In [43]:
run_name = "grid-svm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     svm_param_grid= {
       'C':[0.001,0.01,0.1,0.5,1],
       'kernel':['linear','rbf','poly'],  
       'gamma':['scale','auto'],
       'degree':[2,3,4],  
       'epsilon':[0,0.01,0.1,0.5,1,2]  
      }

     svm_grid = GridSearchCV(estimator=SVR(),param_grid=svm_param_grid)
     svm_grid.fit(scaled_X_train,y_train)
     print(svm_grid.best_params_)
     run.log_parameter(name="C",value=svm_grid.best_params_['C'])
     run.log_parameter(name="kernel",value=svm_grid.best_params_['kernel'])
     run.log_parameter(name="gamma",value=svm_grid.best_params_['gamma'])
     run.log_parameter(name="degree",value=svm_grid.best_params_['degree'])
     run.log_parameter(name="epsilon",value=svm_grid.best_params_['epsilon'])
     #evaluar con datos de train
     svm_grid_preds = svm_grid.predict(scaled_X_train)
     svm_mae=mean_absolute_error(y_train,svm_grid_preds)
     svm_mse=np.sqrt(mean_squared_error(y_train,svm_grid_preds))
     run.log_metric(name="train:mae",value=svm_mae)
     run.log_metric(name="train:mse",value=svm_mse)
     #evaluar con datos de test
     svm_grid_preds = svm_grid.predict(scaled_X_test)
     svm_mae=mean_absolute_error(y_test,svm_grid_preds)
     svm_mse=np.sqrt(mean_squared_error(y_test,svm_grid_preds))
     run.log_metric(name="test:mae",value=svm_mae)
     run.log_metric(name="test:mse",value=svm_mse)

{'C': 1, 'degree': 2, 'epsilon': 2, 'gamma': 'scale', 'kernel': 'linear'}


## Artificial Neural Network

In [44]:
from sklearn import neural_network

### Entrenamiento

In [45]:
run_name = "ann-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
     #entrenamiento
     ann_regression = neural_network.MLPRegressor(max_iter=5000,verbose=False)
     ann_regression.fit(scaled_X_train, y_train)
     print(ann_regression.get_params())
     #verificacion con datos de train
     ann_regression_y_pred=ann_regression.predict(scaled_X_train)
     ann_mae=mean_absolute_error(y_train,ann_regression_y_pred)
     ann_mse=np.sqrt(mean_squared_error(y_train,ann_regression_y_pred))
     run.log_metric(name="train:mae",value=ann_mae)
     run.log_metric(name="train:mse",value=ann_mse) 
     #verificacion con datos de test
     ann_regression_y_pred=ann_regression.predict(scaled_X_test)
     ann_mae=mean_absolute_error(y_test,ann_regression_y_pred)
     ann_mse=np.sqrt(mean_squared_error(y_test,ann_regression_y_pred))
     run.log_metric(name="test:mae",value=ann_mae)
     run.log_metric(name="test:mse",value=ann_mse)

{'activation': 'relu', 'alpha': 0.0001, 'batch_size': 'auto', 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_fun': 15000, 'max_iter': 5000, 'momentum': 0.9, 'n_iter_no_change': 10, 'nesterovs_momentum': True, 'power_t': 0.5, 'random_state': None, 'shuffle': True, 'solver': 'adam', 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': False, 'warm_start': False}
