In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")


In [2]:
df = pd.read_csv(r'..\Datasets\Cases\Concrete Strength\Concrete_Data.csv')
df.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
X = df.drop(columns=['Strength'], axis=1)
y = df['Strength']

In [4]:
df.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Cement            1030 non-null   float64
 1   Blast             1030 non-null   float64
 2   Fly               1030 non-null   float64
 3   Water             1030 non-null   float64
 4   Superplasticizer  1030 non-null   float64
 5   Coarse            1030 non-null   float64
 6   Fine              1030 non-null   float64
 7   Age               1030 non-null   int64  
 8   Strength          1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

In [7]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_lr = r2_score(y_test,y_pred)
r2_lr

0.5771752777048793

In [8]:
dtr = DecisionTreeRegressor(random_state=24)
dtr.fit(X_train, y_train)
y_pred = dtr.predict(X_test)
r2_dtr = r2_score(y_test,y_pred)
r2_dtr

0.83089474226832

In [9]:
en = ElasticNet()
en.fit(X_train, y_train)
y_pred = en.predict(X_test)
r2_en = r2_score(y_test,y_pred)
r2_en

0.5766806310401152

In [10]:
voting = VotingRegressor([('DTR',dtr),('EN',en),('LR',lr)])
voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))

0.7641542071921111


In [11]:
#Voting with weights
voting = VotingRegressor([('DTR',dtr),('EN',en),('LR',lr)],weights=[ 0.7, 0.15, 0.15])
voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))

0.8516273862659319


In [12]:
#Voting with the scores as weights 
voting = VotingRegressor([('DTR',dtr),('EN',en),('LR',lr)],weights=[r2_dtr, r2_en,  r2_lr])
voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))


0.7955821089551823


Voting using estimators of same algorithm(here DT)

In [13]:
dtrl = DecisionTreeRegressor(random_state=24, max_depth=None)
dtr2 = DecisionTreeRegressor(random_state=24, max_depth=3)
dtr3 = DecisionTreeRegressor(random_state=24, max_depth=4)
voting = VotingRegressor([('M1',dtrl), ('M2', dtr2), ('M3',dtr3)], weights=[0.7, 0.15, 0.15])

voting.fit(X_train, y_train)


In [14]:
y_pred = voting.predict(X_test) 
r2_score(y_test, y_pred)

0.8436797107172187