In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('cement_slump.csv')

In [3]:
df.head()

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0,34.99
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0,41.14
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0,1.0,20.0,41.81
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0,3.0,21.5,42.08
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0,20.0,64.0,26.82


In [4]:
df.columns

Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')

In [5]:
X = df.drop('Compressive Strength (28-day)(Mpa)',axis=1)

In [9]:
y = df['Compressive Strength (28-day)(Mpa)']

In [10]:
df.shape

(103, 10)

In [15]:
from sklearn.preprocessing import StandardScaler

In [16]:
scaler = StandardScaler()

In [8]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

In [12]:
from sklearn.svm import SVR,LinearSVR

In [18]:
svr = SVR()

In [19]:
help(SVR)

Help on class SVR in module sklearn.svm._classes:

class SVR(sklearn.base.RegressorMixin, sklearn.svm._base.BaseLibSVM)
 |  SVR(*, kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1)
 |  
 |  Epsilon-Support Vector Regression.
 |  
 |  The free parameters in the model are C and epsilon.
 |  
 |  The implementation is based on libsvm. The fit time complexity
 |  is more than quadratic with the number of samples which makes it hard
 |  to scale to datasets with more than a couple of 10000 samples. For large
 |  datasets consider using :class:`~sklearn.svm.LinearSVR` or
 |  :class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a
 |  :class:`~sklearn.kernel_approximation.Nystroem` transformer.
 |  
 |  Read more in the :ref:`User Guide <svm_regression>`.
 |  
 |  Parameters
 |  ----------
 |  kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'


In [27]:
operations = [('scaler',scaler),('svr',svr)]

In [28]:
kernel = ['rbf','linear','poly','sigmoid']
gamma = ['scale','auto']
C = [0.001,0.01,0.1,0.5,1.0]
epsilon = [0.0,0.01,0.1,1.0,2.0]

In [21]:
from sklearn.pipeline import Pipeline

In [29]:
pipe = Pipeline(operations)

In [30]:
param_grid = {'svr__kernel':kernel,'svr__gamma':gamma,'svr__C':C,'svr__epsilon':epsilon}

In [23]:
from sklearn.model_selection import GridSearchCV

In [31]:
grid_model = GridSearchCV(pipe,param_grid,cv=5)

In [32]:
grid_model.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('svr', SVR())]),
             param_grid={'svr__C': [0.001, 0.01, 0.1, 0.5, 1.0],
                         'svr__epsilon': [0.0, 0.01, 0.1, 1.0, 2.0],
                         'svr__gamma': ['scale', 'auto'],
                         'svr__kernel': ['rbf', 'linear', 'poly', 'sigmoid']})

In [33]:
grid_model.best_params_

{'svr__C': 1.0,
 'svr__epsilon': 2.0,
 'svr__gamma': 'scale',
 'svr__kernel': 'linear'}

In [34]:
y_pred = grid_model.predict(X_test)

In [35]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [36]:
print(mean_absolute_error(y_test,y_pred))

1.474768562578069


In [37]:
rmse = np.sqrt(mean_squared_error(y_test,y_pred))

In [38]:
print(rmse)

1.7793484042378453
