In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

In [17]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score, RandomizedSearchCV
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor

In [3]:
df = pd.read_excel('/content/drive/MyDrive/Udemy/ML com Python/Datasets/Concrete_Data.xls')

In [4]:
df.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [5]:
df.shape

(1030, 9)

In [7]:
df.isnull().sum()

Unnamed: 0,0
Cement (component 1)(kg in a m^3 mixture),0
Blast Furnace Slag (component 2)(kg in a m^3 mixture),0
Fly Ash (component 3)(kg in a m^3 mixture),0
Water (component 4)(kg in a m^3 mixture),0
Superplasticizer (component 5)(kg in a m^3 mixture),0
Coarse Aggregate (component 6)(kg in a m^3 mixture),0
Fine Aggregate (component 7)(kg in a m^3 mixture),0
Age (day),0
"Concrete compressive strength(MPa, megapascals)",0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   Cement (component 1)(kg in a m^3 mixture)              1030 non-null   float64
 1   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  1030 non-null   float64
 2   Fly Ash (component 3)(kg in a m^3 mixture)             1030 non-null   float64
 3   Water  (component 4)(kg in a m^3 mixture)              1030 non-null   float64
 4   Superplasticizer (component 5)(kg in a m^3 mixture)    1030 non-null   float64
 5   Coarse Aggregate  (component 6)(kg in a m^3 mixture)   1030 non-null   float64
 6   Fine Aggregate (component 7)(kg in a m^3 mixture)      1030 non-null   float64
 7   Age (day)                                              1030 non-null   int64  
 8   Concrete compressive strength(MPa, megapascals)  

In [14]:
# Variáveis preditoras e alvo.
y = df['Concrete compressive strength(MPa, megapascals) ']
X = df.drop('Concrete compressive strength(MPa, megapascals) ', axis=1)

In [16]:
# Criação do primeiro modelo
modelo = BaggingRegressor(n_estimators=100, random_state=42, n_jobs=-1)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
resultado = cross_val_score(modelo, X, y, cv=kfold, n_jobs=-1)
print(resultado.mean())

0.9059173094748326


In [19]:
# Criação de um segundo modelo
modelo2 = BaggingRegressor(estimator=GradientBoostingRegressor(n_estimators=100), n_estimators=100,
                           n_jobs=-1, random_state=42)

resultado = cross_val_score(modelo2, X, y, cv=kfold, n_jobs=-1)
print(resultado.mean())

0.9036432525864676


## Outra forma de aplicação

In [22]:
# Teste de diferentes parâmetros
param_grid = {
    'min_samples_split': np.array([2, 3, 4, 5, 6, 7]),
    'min_samples_leaf': np.array([2, 3, 4, 5, 6, 7]),
    'max_depth': np.array([3, 5, 7, 9, 11, 12])
}

modelo2 = GradientBoostingRegressor(n_estimators=100, random_state=42)
random_search = RandomizedSearchCV(modelo2, param_grid, cv=kfold, n_jobs=-1)
random_search.fit(X, y)

In [23]:
# Verificando os melhores parâmetros encontrados
random_search.best_params_

{'min_samples_split': 6, 'min_samples_leaf': 5, 'max_depth': 7}

In [24]:
# Verificando o score
random_search.best_score_

0.9174729084046428

## Outra verificação

In [25]:
X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, test_size=0.2, random_state=42)

modelo3 = BaggingRegressor(estimator=GradientBoostingRegressor(n_estimators=100,
                                                               min_samples_split=6,
                                                               min_samples_leaf=5,
                                                               max_depth=7),
                           n_estimators=100, n_jobs=-1, random_state=42)

modelo3.fit(X_tr, y_tr)

In [26]:
# Predição
y_pred = modelo3.predict(X_ts)

r_2 = r2_score(y_ts, y_pred)
rmse = np.sqrt(mean_squared_error(y_ts, y_pred))
mae = mean_absolute_error(y_ts, y_pred)

# Exibir as métricas
print(f"R²: {r_2}")
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")

R²: 0.9087560904480998
RMSE: 4.848899864429712
MAE: 3.4134330480884
