In [None]:
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import sklearn as sk
import sys
import matplotlib.pyplot as plt
from sklearn import svm
from jedi.api.refactoring import inline

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.pipeline import make_pipeline
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, AdaBoostRegressor

from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.base import clone
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, precision_recall_curve, roc_curve, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_boston
from sklearn.svm import SVR

boston_data = load_boston()
df = pd.DataFrame(boston_data.data, columns=boston_data.feature_names)
df.head()

y = boston_data.target
X = df[['LSTAT']].values

# Modelo de Regresión de vectores de soporte SVR(SUPPORT VECTOR REGRESSION)
svr = SVR(gamma='auto')
svr.fit(X, y)

sort_idx =  X.flatten().argsort()

plt.figure(figsize=(12,8))
plt.scatter(X[sort_idx], y[sort_idx])
plt.plot(X[sort_idx], svr.predict(X[sort_idx]), color='k')

plt.xlabel('LSTAT')
plt.ylabel('MEDV')
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.3, random_state=42)  #Separa datos en entrenamiento y prueba

# Lineal
svr = SVR(kernel='linear')
svr.fit(X_train, y_train)

y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)

R2Score_train = r2_score(y_train, y_train_pred)
R2Score_test = r2_score(y_test, y_test_pred)

print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))


#Polynomial
svr = SVR(kernel='poly', C=1e3, degree=2, gamma='auto')
svr.fit(X_train, y_train)

y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)

R2Score_train = r2_score(y_train, y_train_pred)
R2Score_test = r2_score(y_test, y_test_pred)

print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))

#rbf kernel
svr = SVR(kernel='rbf', C=1e3, gamma='0.1')
svr.fit(X_train, y_train)

y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)

R2Score_train = r2_score(y_train, y_train_pred)
R2Score_test = r2_score(y_test, y_test_pred)

print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))


#------------------------------------------------ Código Resumido -----------------------------------------------------------
def SVRTypes(p_X_train, p_y_train, p_X_test, p_y_test, type):

    if(type == 'Lineal'):

        # Lineal
        svr = SVR(kernel='linear')
        svr.fit(p_X_train, p_y_train)

        y_train_pred = svr.predict(p_X_train)
        y_test_pred = svr.predict(p_X_test)

        mse_train = mean_squared_error(p_y_train, y_train_pred)
        mse_test = mean_squared_error(y_test, y_test_pred)

        R2Score_train = r2_score(p_y_train, y_train_pred)
        R2Score_test = r2_score(p_y_test, y_test_pred)

        print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
        print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))

    elif(type == 'poly'):
        # Polynomial
        svr = SVR(kernel='poly', C=1e3, degree=2, gamma='auto')
        svr.fit(p_X_train, p_y_train)

        y_train_pred = svr.predict(p_X_train)
        y_test_pred = svr.predict(p_X_test)

        mse_train = mean_squared_error(p_y_train, y_train_pred)
        mse_test = mean_squared_error(p_y_test, y_test_pred)

        R2Score_train = r2_score(p_y_train, y_train_pred)
        R2Score_test = r2_score(p_y_test, y_test_pred)

        print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
        print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))

    elif(type == 'rbf'):

        # rbf kernel
        svr = SVR(kernel='rbf', C=1e3, gamma='0.1')
        svr.fit(p_X_train, p_y_train)

        y_train_pred = svr.predict(p_X_train)
        y_test_pred = svr.predict(p_X_test)

        mse_train = mean_squared_error(p_y_train, y_train_pred)
        mse_test = mean_squared_error(p_y_test, y_test_pred)

        R2Score_train = r2_score(p_y_train, y_train_pred)
        R2Score_test = r2_score(p_y_test, y_test_pred)

        print('MSE train: {0:.4f}, test: {1:.4f}'.format(mse_train, mse_test))
        print("R2 train: {0: .4f}, test; {1:.4f}".format(R2Score_train, R2Score_test))

    return


# Ventajas y desventajas del SVM
#Ventajas
# 1. Efectivo en espacios de dimensiones grandes
# 2. Utiliza solo un subconjunto de datos de puntos de entrenamiento (vectores de soporte) en la función de decisión
#3. Muchas funciones de kernels diferentes pueden ser especificados para la función de decisión
# Ejm: lineal, polinomial, radial basis function, sigmoid, custom

#Desventajas
#Cuidado con la sobreespecificación del modelo cuando el número de características > número de muestras
# La elección del kernel y la regularización pueden tener un impacto largo en el desempeño
# No estima probabilidades

