# Determinando força do concreto com modelos de regressão

Dataset do Kaggle: https://www.kaggle.com/pavanraj159/concrete-compressive-strength-data-set 

Importando bibliotecas para manipulação e visualização dos dados

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Lendo Arquivo CSV

In [None]:
dados = pd.read_csv('compresive_strength_concrete.csv')

Exibindo cinco primeiras linhas

In [None]:
dados.head()

In [None]:
dados.columns = ['Cement','Blast Furnace Slag','Fly Ash','Water','Superplasticizer','Coarse Aggregate','Fine Aggregate','Age','Compressive strength']

In [None]:
dados.head()

Exibindo informações dos dados

In [None]:
dados.info()

Verificando se existem NaNs

In [None]:
print(dados.isna().sum())

Visualização dos dados

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(3,3,1)
dados[dados.columns[0]].plot(kind='hist')
plt.xlabel(dados.columns[0])
plt.subplot(3,3,2)
dados[dados.columns[1]].plot(kind='hist')
plt.xlabel(dados.columns[1])
plt.subplot(3,3,3)
dados[dados.columns[2]].plot(kind='hist')
plt.xlabel(dados.columns[2])
plt.subplot(3,3,4)
dados[dados.columns[3]].plot(kind='hist')
plt.xlabel(dados.columns[3])
plt.subplot(3,3,5)
dados[dados.columns[4]].plot(kind='hist')
plt.xlabel(dados.columns[4])
plt.subplot(3,3,6)
dados[dados.columns[5]].plot(kind='hist')
plt.xlabel(dados.columns[5])
plt.subplot(3,3,7)
dados[dados.columns[6]].plot(kind='hist')
plt.xlabel(dados.columns[6])
plt.subplot(3,3,8)
dados[dados.columns[7]].plot(kind='hist')
plt.xlabel(dados.columns[7])
plt.subplot(3,3,9)
dados[dados.columns[8]].plot(kind='hist')
plt.xlabel(dados.columns[8])
plt.tight_layout()

Verificando correlação entre as variáveis

In [None]:
corr = dados.corr()

In [None]:
sns.heatmap(corr)

Normalizando valores no intervalo entre 0 e 1

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
colunas = dados.columns

In [None]:
for col in colunas:
    scaler = MinMaxScaler(feature_range=(0, 1))
    dados[col] = scaler.fit_transform(dados[col].values.reshape(-1, 1))

In [None]:
dados.head()

Determinando variáveis X e Y

In [None]:
X = dados[colunas[0:-1]].values
Y = dados[dados.columns[-1]].values

Criando amostras de treino e teste

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_treino, X_teste, Y_treino, Y_teste = train_test_split(X, Y, test_size=0.25, random_state=42)

Importando bibliotecas para cálculo dos erros dos modelos

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

Criando função para cálculo do erro percentual absoluto médio

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

Modelo 1: Regressão Linear

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linreg = LinearRegression()

In [None]:
linreg.fit(X_treino,Y_treino)

In [None]:
Y_pred_linreg = linreg.predict(X_teste)

In [None]:
MAE_linreg = mean_absolute_error(Y_teste,Y_pred_linreg)
MSE_linreg = mean_squared_error(Y_teste,Y_pred_linreg)
RMSE_linreg = np.sqrt(MSE_linreg)
MAPE_linreg = mean_absolute_percentage_error(Y_teste,Y_pred_linreg)

In [None]:
print("MAE = {:0.2f}".format(MAE_linreg))
print("MAPE = {:0.2f}%".format(MAPE_linreg))
print("MSE = {:0.2f}".format(MSE_linreg))
print("RMSE = {:0.2f}".format(RMSE_linreg))

Modelo 2: Support Vector Regressor

In [None]:
from sklearn.svm import SVR

In [None]:
svr = SVR()

In [None]:
svr.fit(X_treino,Y_treino)

In [None]:
Y_pred_svr = svr.predict(X_teste)

In [None]:
MAE_svr = mean_absolute_error(Y_teste,Y_pred_svr)
MSE_svr = mean_squared_error(Y_teste,Y_pred_svr)
RMSE_svr = np.sqrt(MSE_svr)
MAPE_svr = mean_absolute_percentage_error(Y_teste,Y_pred_svr)

In [None]:
print("MAE = {:0.2f}".format(MAE_svr))
print("MAPE = {:0.2f}%".format(MAPE_svr))
print("MSE = {:0.2f}".format(MSE_svr))
print("RMSE = {:0.2f}".format(RMSE_svr))

Modelo 3: Decision Tree Regressor

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
dte = DecisionTreeRegressor()

In [None]:
dte.fit(X_treino,Y_treino)

In [None]:
Y_pred_dte = dte.predict(X_teste)

In [None]:
MAE_dte = mean_absolute_error(Y_teste,Y_pred_dte)
MSE_dte = mean_squared_error(Y_teste,Y_pred_dte)
RMSE_dte = np.sqrt(MSE_dte)
MAPE_dte = mean_absolute_percentage_error(Y_teste,Y_pred_dte)

In [None]:
print("MAE = {:0.2f}".format(MAE_dte))
print("MAPE = {:0.2f}%".format(MAPE_dte))
print("MSE = {:0.2f}".format(MSE_dte))
print("RMSE = {:0.2f}".format(RMSE_dte))

Modelo 4: Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rfr = RandomForestRegressor()

In [None]:
rfr.fit(X_treino,Y_treino)

In [None]:
Y_pred_rfr = rfr.predict(X_teste)

In [None]:
MAE_rfr = mean_absolute_error(Y_teste,Y_pred_rfr)
MSE_rfr = mean_squared_error(Y_teste,Y_pred_rfr)
RMSE_rfr = np.sqrt(MSE_rfr)
MAPE_rfr = mean_absolute_percentage_error(Y_teste,Y_pred_rfr)

In [None]:
print("MAE = {:0.2f}".format(MAE_rfr))
print("MAPE = {:0.2f}%".format(MAPE_rfr))
print("MSE = {:0.2f}".format(MSE_rfr))
print("RMSE = {:0.2f}".format(RMSE_rfr))

Modelo 5: AdaBoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor

In [None]:
ada = AdaBoostRegressor()

In [None]:
ada.fit(X_treino,Y_treino)

In [None]:
Y_pred_ada = ada.predict(X_teste)

In [None]:
MAE_ada = mean_absolute_error(Y_teste,Y_pred_ada)
MSE_ada = mean_squared_error(Y_teste,Y_pred_ada)
RMSE_ada = np.sqrt(MSE_ada)
MAPE_ada = mean_absolute_percentage_error(Y_teste,Y_pred_ada)

In [None]:
print("MAE = {:0.2f}".format(MAE_ada))
print("MAPE = {:0.2f}%".format(MAPE_ada))
print("MSE = {:0.2f}".format(MSE_ada))
print("RMSE = {:0.2f}".format(RMSE_ada))

In [None]:
modelo = ["Regressão linear","SVR","Decion Tree","Random Forest","Ada Boost"]
MAE = [MAE_linreg,MAE_svr,MAE_dte,MAE_rfr,MAE_ada]
MAPE = [MAPE_linreg,MAPE_svr,MAPE_dte,MAPE_rfr,MAPE_ada]
MSE = [MSE_linreg,MSE_svr,MSE_dte,MSE_rfr,MSE_ada]
RMSE = [RMSE_linreg,RMSE_svr,RMSE_dte,RMSE_rfr,RMSE_ada]

In [None]:
dici = {"Modelo" : modelo, "MAE" : MAE, "MAPE" : MAPE, "MSE" : MSE, "RMSE" : RMSE}

In [None]:
pd_dici = pd.DataFrame(dici).sort_values(by="MAPE")

In [None]:
pd_dici

Modelo Random Forest apresentou melhor precisão em comparação aos demais modelos