In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

# 1 Carregando os dados

In [None]:
def read_pickle(name):
    with (open(name, 'rb')) as openfile:
        while True:
            try:
                one_instance = pickle.load(openfile)
            except EOFError:
                break
    one_instance = np.asanyarray(one_instance)
    return one_instance

In [None]:
path = '/content/drive/MyDrive/Sistemas_de_Informação/Sistemas inteligentes/RESOLUÇÕES/atv_03/'

In [None]:
X_train = read_pickle(path+'X_train.pickle')
X_test = read_pickle(path+'X_test.pickle')
y_train = read_pickle(path+'y_train.pickle')
y_test = read_pickle(path+'y_test.pickle')

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(179, 7)
(179,)
(45, 7)
(45,)


In [None]:
#salvar os resultados do rmse
results = {}

# 2 LinearRegressor

In [None]:
lin_reg = LinearRegression() 
lin_reg.fit(X_train, y_train)

LinearRegression()

In [None]:
predictions = lin_reg.predict(X_test)

In [None]:
lin_mse = mean_squared_error(y_test, predictions)
lin_rmse = np.sqrt(lin_mse)
print(lin_rmse)

9.281097799962511


In [None]:
results['linear_regressor'] = round(lin_rmse,4)

# 3 DecisionTreeRegressor

In [None]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train,y_train)

DecisionTreeRegressor()

In [None]:
predictions = dt_reg.predict(X_test)

In [None]:
dt_mse = mean_squared_error(y_test, predictions)
dt_rmse = np.sqrt(dt_mse)
print(dt_rmse)

11.312726363603858


In [None]:
results['decision_tree'] = round(dt_rmse,4)

# 4 RandomForestRegressor

In [None]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train,y_train)

RandomForestRegressor()

In [None]:
rf_predictions = rf_reg.predict(X_test)

In [None]:
rf_mse = mean_squared_error(y_test,rf_predictions)
rf_rmse = np.sqrt(rf_mse)
rf_rmse

12.425317076213565

In [None]:
results['random_forest'] = round(rf_rmse,4)

## 4.1 GridSearch

In [None]:
def runGridSeach(reg, params,x_train,y_train):
  search = GridSearchCV(reg,
                        params,
                        scoring = 'neg_root_mean_squared_error',
                        cv = 5, n_jobs=-1
                        )
  search.fit(x_train, y_train)
  parametros = search.best_params_
  print(parametros)
  return parametros

In [None]:
rf_reg = RandomForestRegressor()

In [None]:
prs = {'n_estimators': [80, 90, 100, 120], 'max_features': [2, 4, 6], 'bootstrap': [False]}

In [None]:
prs_rf = runGridSeach(rf_reg, prs,X_train,y_train)

{'bootstrap': False, 'max_features': 6, 'n_estimators': 90}


In [None]:
rf_reg = RandomForestRegressor(n_estimators=prs_rf['n_estimators'])
rf_reg.fit(X_train,y_train)

RandomForestRegressor(n_estimators=90)

In [None]:
rf_predictions = rf_reg.predict(X_test)

In [None]:
rf_mse = mean_squared_error(y_test,rf_predictions)
rf_rmse = np.sqrt(rf_mse)
rf_rmse

12.563030878572539

In [None]:
results['random_forest_gs'] = round(rf_rmse,4)

# 5 SVM

In [None]:
reg_svm = SVR()

In [None]:
reg_svm.fit(X_train, y_train)

SVR()

In [None]:
predictions = reg_svm.predict(X_test)
svm_mse = mean_squared_error(y_test, predictions)
svm_rmse = np.sqrt(svm_mse)
print(svm_rmse)

33.807113325864826


In [None]:
results['svm'] =round(svm_rmse,4)

## 5.1 Grid Search

In [None]:
reg_svm = SVR()

In [None]:
prs = {'kernel':['linear', 'rbf', 'sigmoid'],'C':[0.1,0.5,0.7,1.0,1.2]}

In [None]:
prs_svm = runGridSeach(reg_svm, prs,X_train,y_train)

{'C': 1.2, 'kernel': 'linear'}


In [None]:
reg_svm = SVR(C=prs_svm['C'],kernel=prs_svm['kernel'])

In [None]:
reg_svm.fit(X_train, y_train)

SVR(C=1.2, kernel='linear')

In [None]:
predictions = reg_svm.predict(X_test)
svm_mse = mean_squared_error(y_test, predictions)
svm_rmse = np.sqrt(svm_mse)
print(svm_rmse)

19.788068166458768


In [None]:
results['svm_gs'] =round(svm_rmse,4)

# 6 Seleção de características

## 6.1 Avaliando o melhor modelo

In [None]:
results

{'decision_tree': 11.3127,
 'linear_regressor': 9.2811,
 'random_forest': 12.4253,
 'random_forest_gs': 12.4691,
 'svm': 33.8071,
 'svm_gs': 19.7881}

## 6.2 Criando a função para o Linear Regressor

In [None]:
def featSelection(reg,x_train,x_test,y_train, y_test):
  impFeat = list(reg.coef_)
  menor = min(impFeat)
  id = impFeat.index(menor)
  x_train_selected = np.delete(x_train,id,1)
  x_test_selected = np.delete(x_test,id,1)
  return x_train_selected, x_test_selected

In [None]:
def contRmse(reg,x_train_s,x_test_s,y_train, y_test):
  reg.fit(x_train_s,y_train)
  pred = reg.predict(x_test_s)
  mse = mean_squared_error(y_test,pred)
  rmse = np.sqrt(mse)
  return rmse

In [None]:
lin_reg = LinearRegression() 
contRmse(lin_reg,X_train,X_test,y_train, y_test)

9.281097799962511

In [None]:
lin_reg.coef_

array([ 7.93927383e+01, -1.91297135e+00, -5.05566700e+03,  5.18571698e+03,
       -4.69794368e-01,  9.50234405e-01,  4.19571537e+00])

In [None]:
i=1
while (i <= (len(lin_reg.coef_))):
  x_train_s, x_test_s = featSelection(lin_reg,X_train,X_test,y_train, y_test)
  print('='*20)
  print(x_train_s.shape, x_test_s.shape)
  rmse = contRmse(lin_reg,x_train_s, x_test_s,y_train, y_test)
  print('Rmse = ', round(rmse,4))
  print('='*20)
  X_train = x_train_s
  X_test = x_test_s
  i+=1
  

(179, 6) (45, 6)
Rmse =  9.1033
(179, 5) (45, 5)
Rmse =  9.2631
(179, 4) (45, 4)
Rmse =  9.0648
(179, 3) (45, 3)
Rmse =  9.0763
