In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
data = pd.read_csv("../Data/cement_slump.csv")

In [3]:
data.head(2)

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0,34.99
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0,41.14


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 10 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Cement                              103 non-null    float64
 1   Slag                                103 non-null    float64
 2   Fly ash                             103 non-null    float64
 3   Water                               103 non-null    float64
 4   SP                                  103 non-null    float64
 5   Coarse Aggr.                        103 non-null    float64
 6   Fine Aggr.                          103 non-null    float64
 7   SLUMP(cm)                           103 non-null    float64
 8   FLOW(cm)                            103 non-null    float64
 9   Compressive Strength (28-day)(Mpa)  103 non-null    float64
dtypes: float64(10)
memory usage: 8.2 KB


In [5]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Cement,103.0,229.894175,78.87723,137.0,152.0,248.0,303.9,374.0
Slag,103.0,77.973786,60.461363,0.0,0.05,100.0,125.0,193.0
Fly ash,103.0,149.014563,85.41808,0.0,115.5,164.0,235.95,260.0
Water,103.0,197.167961,20.208158,160.0,180.0,196.0,209.5,240.0
SP,103.0,8.539806,2.80753,4.4,6.0,8.0,10.0,19.0
Coarse Aggr.,103.0,883.978641,88.391393,708.0,819.5,879.0,952.8,1049.9
Fine Aggr.,103.0,739.604854,63.342117,640.6,684.5,742.7,788.0,902.0
SLUMP(cm),103.0,18.048544,8.750844,0.0,14.5,21.5,24.0,29.0
FLOW(cm),103.0,49.61068,17.56861,20.0,38.5,54.0,63.75,78.0
Compressive Strength (28-day)(Mpa),103.0,36.038738,7.83712,17.19,30.9,35.52,41.205,58.53


In [6]:
data.columns

Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')

In [7]:
X = data.drop(columns=['Compressive Strength (28-day)(Mpa)'], axis=1)
y = data["Compressive Strength (28-day)(Mpa)"]

In [8]:
X

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0,1.0,20.0
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0,3.0,21.5
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0,20.0,64.0
...,...,...,...,...,...,...,...,...,...
98,248.3,101.0,239.1,168.9,7.7,954.2,640.6,0.0,20.0
99,248.0,101.0,239.9,169.1,7.7,949.9,644.1,2.0,20.0
100,258.8,88.0,239.6,175.3,7.6,938.9,646.0,0.0,20.0
101,297.1,40.9,239.9,194.0,7.5,908.9,651.8,27.5,67.0


In [9]:
y

0      34.99
1      41.14
2      41.81
3      42.08
4      26.82
       ...  
98     49.97
99     50.23
100    50.50
101    49.17
102    48.70
Name: Compressive Strength (28-day)(Mpa), Length: 103, dtype: float64

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=101)

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()

In [14]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
from sklearn.svm import SVR, LinearSVR

In [16]:
# help(SVR)

In [17]:
base_model = SVR()

In [18]:
base_model.fit(X_train_scaled, y_train)

SVR()

In [19]:
base_model_predict = base_model.predict(X_test)

In [20]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [21]:
mean_absolute_error(y_test, base_model_predict)

6.762776104941544

In [22]:
np.sqrt(mean_squared_error(y_test, base_model_predict))

8.275746418041566

In [23]:
y_test.mean()

36.26870967741935

In [32]:
param_grid = {'C':[0.001, 0.01, 0.1, 0.5, 1],
             "kernel":["linear","rbf","poly"],
             "gamma":["scale","auto"],
             "degree":[2,3,4,5],
             "epsilon":[0.0, 0.01, 0.1, 0.5, 1, 2]}

In [40]:
from sklearn.model_selection import GridSearchCV

In [41]:
svr = SVR()
grid_search = GridSearchCV(svr, param_grid)

In [42]:
grid_search.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1],
                         'degree': [2, 3, 4, 5],
                         'epsilon': [0.0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']})

In [43]:
grid_search.best_params_

{'C': 1, 'degree': 2, 'epsilon': 2, 'gamma': 'scale', 'kernel': 'linear'}

In [44]:
grid_pred = grid_search.predict(X_test_scaled)

In [45]:
mean_absolute_error(y_test, grid_pred)

2.5128012210762365

In [46]:
np.sqrt(mean_squared_error(y_test, grid_pred))

3.178210305119858