In [None]:
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import pandas as pd
import math

In [None]:
# #############################################################################
# Generate sample data
X = np.sort(5 * np.random.rand(40, 1), axis=0)
y = np.sin(X).ravel()

# #############################################################################
# Add noise to targets
y[::5] += 3 * (0.5 - np.random.rand(8))

# #############################################################################
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1,
               coef0=1)

# #############################################################################
# Look at the results
lw = 2

svrs = [svr_rbf, svr_lin, svr_poly]
kernel_label = ['RBF', 'Linear', 'Polynomial']
model_color = ['m', 'c', 'g']

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
    axes[ix].plot(X, svr.fit(X, y).predict(X), color=model_color[ix], lw=lw,
                  label='{} model'.format(kernel_label[ix]))
    axes[ix].scatter(X[svr.support_], y[svr.support_], facecolor="none",
                     edgecolor=model_color[ix], s=50,
                     label='{} support vectors'.format(kernel_label[ix]))
    axes[ix].scatter(X[np.setdiff1d(np.arange(len(X)), svr.support_)],
                     y[np.setdiff1d(np.arange(len(X)), svr.support_)],
                     facecolor="none", edgecolor="k", s=50,
                     label='other training data')
    axes[ix].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1),
                    ncol=1, fancybox=True, shadow=True)

fig.text(0.5, 0.04, 'data', ha='center', va='center')
fig.text(0.06, 0.5, 'target', ha='center', va='center', rotation='vertical')
fig.suptitle("Support Vector Regression", fontsize=14)
plt.show()

In [None]:
df = pd.read_csv('../datasets/dados_normalizados_modelo_agua_tratada.csv', delimiter=',', names=['CIDADE', 'COD_IBGE', 'CASOS_100k_H', 'POP_ALFAB', 'POP_NAO_ALFAB', 'PER_SEXO_F', 'PER_SEXO_M', 'IDADE_MEDIA',
           'PER_RACA_BRANCA', 'PERC_RACA_PRETA', 'PERC_RACA_AMARELA', 'PERC_RACA_PARDA', 'PERC_RACA_INDIGENA', 'PERC_RACA_IGNORADA',
           'PERC_SEM_ESCOLARIDADE', 'PERC_SEM_ESCOL_FUND_1', 'PERC_SEM_ESCOL_FUND_2', 'PERC_SEM_ESCOL_MEDIO', 'PERC_SEM_ESCOL_SUPERIOR',
           'PERC_SEM_ESCOL_NAO_APLIC', 'PERC_SEM_ESCOL_IGNORADO', 'PERC_ZONA_URBANA', 'PERC_ZONA_RURAL', 'PERC_ZONA_PERIURBANA', 
           'PERC_CONTATO_AVE_SUINO_SIM', 'PERC_CONTATO_AVE_SUINO_NAO', 'PERC_TOSSE_SIM', 'PERC_TOSSE_NAO', 'PERC_DOWN_SIM', 'PERC_DOWN_NAO',
           'PERC_VACINADO_GRIPE_SIM', 'PERC_VACINADO_GRIPE_NAO', 'PERC_HIST_VIAGEM_SIM', 'PERC_HIST_VIAGEM_NAO', 'PERC_ZONA_IGNORADA', 'PERC_MORADORES_AGUA_TRATADA'], skiprows=1)

In [None]:
X = df[['IDADE_MEDIA',
        'PERC_TOSSE_SIM',
        'PERC_MORADORES_AGUA_TRATADA',
        'PERC_VACINADO_GRIPE_SIM']].iloc[:]
y = df['CASOS_100k_H']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1,
               coef0=1)

In [None]:
model_rbf = svr_rbf.fit(X_train, y_train)

In [None]:
model_lin = svr_lin.fit(X_train, y_train)

In [None]:
# model_poly = svr_poly.fit(X_train, y_train)

In [None]:
def getCoeffDf(model, X):
    return pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])

In [None]:
coeff_lin = getCoeffDf(model_lin, X)
coeff_lin

In [None]:
def predict(model, X_test, y_test):
    y_pred = model.predict(X_test)
    #mostra predições x valores reais
    return (y_pred, pd.DataFrame({'Actual': y_test, 'Predicted': y_pred}))

In [None]:
y_pred_rbf, df_rbf = predict(model_rbf, X_test, y_test)
df_rbf

In [None]:
y_pred_lin, df_lin = predict(model_lin, X_test, y_test)
df_lin

In [None]:
df_poly = predict(model_poly, X_test, y_test)
df_poly

In [None]:
#verifica diferentes métricas de avaliação
from sklearn import metrics

def showMetrics(y_test, y_pred):
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print('Determination Coefficient (R^2)', metrics.r2_score(y_test, y_pred))

In [None]:
showMetrics(y_test, y_pred_rbf)

In [None]:
showMetrics(y_test, y_pred_lin)

In [None]:
from sklearn.model_selection import cross_val_score

def crossValidation(model, X, y):
    cv_4_results = cross_val_score(model, X, y, cv=4, scoring="r2")
    print(cv_4_results)
    print(cv_4_results.mean())

In [None]:
crossValidation(model_rbf, )

In [None]:
crossValidation(model_lin)