In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from sklearn import svm

In [4]:
packing_df = pd.read_csv('packing_sim.csv')

In [5]:
packing_df.dropna(how='all', axis=1, inplace=True) #drop empty columns

In [5]:
packing_df

Unnamed: 0,RCA,Pumice,Cement,Packing
0,0.00,0.00,1.00,0.600
1,0.00,0.05,0.95,0.612
2,0.00,0.10,0.90,0.624
3,0.00,0.15,0.85,0.637
4,0.00,0.20,0.80,0.650
...,...,...,...,...
225,0.90,0.05,0.05,0.650
226,0.90,0.10,0.00,0.637
227,0.95,0.00,0.05,0.630
228,0.95,0.05,0.00,0.618


In [6]:
X, y = np.split( packing_df.to_numpy(), [-1], axis = 1 ) #split features from last column
y = y.flatten()

In [9]:
rbf = svm.SVR() # default is rbf kernel, gamma set to 'scale' so 1/(n_feat * variance), about 5
rbf.fit(X, y)
rbf.score(X, y) # around .42

0.4227663122527535

In [10]:
# try poly kernel
poly = svm.SVR(kernel='poly') # default degree is 3
poly.fit(X, y)
poly.score(X, y)

0.2849737911833873

In [7]:
from sklearn import preprocessing

In [13]:
# try polynomial transform first
transformer = preprocessing.PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
poly_X = transformer.fit_transform(X)

In [16]:
rbf.fit(poly_X, y)
rbf.score(poly_X, y)

0.42304437401727923

In [17]:
lin = svm.SVR(kernel='linear')
lin.fit(poly_X, y)
lin.score(poly_X, y)

0.30230789845098904

### Testing different gammas with rbf

In [18]:
rbf_auto_svr = svm.SVR(gamma='auto') # 1/n_features, so .33
rbf_auto_svr.fit(X, y)
rbf_auto_svr.score(X, y)

0.3261486467798126

In [25]:
def test_gammas(gammas, kernel='rbf'):
    for g in gammas:
        svr = svm.SVR(kernel=kernel, gamma=g)
        svr.fit(X, y)
        print('Gamma:', g, 'Score:', svr.score(X, y))

In [21]:
low_gammas = [k/10 for k in range(1,11)]

In [27]:
high_gammas = [k for k in range(1,11)]

In [26]:
test_gammas(low_gammas)

Gamma: 0.1 Score: 0.21747244150516798
Gamma: 0.2 Score: 0.3090247715006814
Gamma: 0.3 Score: 0.3231189302135853
Gamma: 0.4 Score: 0.32873482826702216
Gamma: 0.5 Score: 0.3366798315153423
Gamma: 0.6 Score: 0.3437486921392351
Gamma: 0.7 Score: 0.3481689806542073
Gamma: 0.8 Score: 0.3558116418258319
Gamma: 0.9 Score: 0.36070709505471577
Gamma: 1.0 Score: 0.368943151353576


In [28]:
test_gammas(high_gammas)

Gamma: 1 Score: 0.368943151353576
Gamma: 2 Score: 0.41341224256907017
Gamma: 3 Score: 0.42854388239491104
Gamma: 4 Score: 0.4295383605326619
Gamma: 5 Score: 0.425384594696116
Gamma: 6 Score: 0.42231246704591885
Gamma: 7 Score: 0.4186524668884527
Gamma: 8 Score: 0.41050108144207487
Gamma: 9 Score: 0.4067328465594511
Gamma: 10 Score: 0.40049689755846063


In [29]:
test_gammas(low_gammas,'poly')

Gamma: 0.1 Score: 0.019009771435413714
Gamma: 0.2 Score: 0.13471448601364144
Gamma: 0.3 Score: 0.21165986380483737
Gamma: 0.4 Score: 0.2731483980879371
Gamma: 0.5 Score: 0.28861290376408
Gamma: 0.6 Score: 0.2890259128420771
Gamma: 0.7 Score: 0.2869685058878876
Gamma: 0.8 Score: 0.2858758487515496
Gamma: 0.9 Score: 0.28497379074129414
Gamma: 1.0 Score: 0.28497376704034694


In [30]:
test_gammas(high_gammas,'poly')

Gamma: 1 Score: 0.28497376704034694
Gamma: 2 Score: 0.28497376704034694
Gamma: 3 Score: 0.2858758730839849
Gamma: 4 Score: 0.28497376704034694
Gamma: 5 Score: 0.28497377169304583
Gamma: 6 Score: 0.2858758730839849
Gamma: 7 Score: 0.28497377752484976
Gamma: 8 Score: 0.28497376704034694
Gamma: 9 Score: 0.28587586875230164
Gamma: 10 Score: 0.28497377169304583


### Testing different C values

In [33]:
def test_Cs(Cs, kernel='rbf'):
    for C in Cs:
        svr = svm.SVR(kernel=kernel, C=C)
        svr.fit(X, y)
        print('C:', C, 'Score:', svr.score(X, y))

In [40]:
Cs = [10**k for k in range(1,-6,-1)] # default is 1, low C is used for noisy data (more regularization)

In [41]:
test_Cs(Cs)

C: 10 Score: 0.4227663122527535
C: 1 Score: 0.4227663122527535
C: 0.1 Score: 0.4227663122527535
C: 0.01 Score: 0.30598865055900737
C: 0.001 Score: 0.1074054051328458
C: 0.0001 Score: 0.009041280461337564
C: 1e-05 Score: -0.004123866701380319


In [36]:
test_Cs(Cs,'poly')

C: 10 Score: 0.2849737911833873
C: 1 Score: 0.2849737911833873
C: 0.1 Score: 0.2849737911833873
C: 0.01 Score: 0.2849737911833873
C: 0.001 Score: 0.28972563134081797
C: 0.0001 Score: 0.17268303829044096
C: 1e-05 Score: 0.02924010476617611


In [37]:
test_Cs(Cs,'linear')

C: 10 Score: 0.048800845738672605
C: 1 Score: 0.0487673831110913
C: 0.1 Score: 0.04808598882961834
C: 0.01 Score: 0.04090727451142562
C: 0.001 Score: 0.006721555332966478
C: 0.0001 Score: -0.004276604546944451
C: 1e-05 Score: -0.0054772446682243014


### Testing different polynomial kernel degrees


In [13]:
def test_poly_degs(degs, gamma='scale', epsilon=0.1): # same defaults
    for d in degs:
        svr = svm.SVR(kernel='poly', degree=d, gamma=gamma, epsilon=epsilon)
        svr.fit(X, y)
        print('degree=', d, 'Score:', svr.score(X, y))

In [8]:
degs = [k for k in range(2,6)]

In [14]:
test_poly_degs(degs)

degree= 2 Score: 0.30457160644905346
degree= 3 Score: 0.2849737911833873
degree= 4 Score: 0.2623835474020295
degree= 5 Score: 0.2444893164261298


### Testing different epsilon values

In [42]:
def test_eps(eps, kernel='rbf'):
    for ep in eps:
        svr = svm.SVR(kernel=kernel, epsilon=ep)
        svr.fit(X, y)
        print('epsilon:', ep, 'Score:', svr.score(X, y))

In [47]:
def test_eps_poly(eps, kernel='rbf'): # use poly transformed features
    for ep in eps:
        svr = svm.SVR(kernel=kernel, epsilon=ep)
        svr.fit(poly_X, y)
        print('epsilon:', ep, 'Score:', svr.score(poly_X, y))

In [43]:
eps = [10**k for k in range(1,-6,-1)]

In [48]:
test_eps(eps)

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.42304437401727923
epsilon: 0.01 Score: 0.9843000696590908
epsilon: 0.001 Score: 0.9864728584306907
epsilon: 0.0001 Score: 0.985789385705972
epsilon: 1e-05 Score: 0.985846116999631


In [52]:
test_eps(eps,'linear')

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.0487673831110913
epsilon: 0.01 Score: 0.034053078962909655
epsilon: 0.001 Score: 0.02997911084973448
epsilon: 0.0001 Score: 0.03820979586949069
epsilon: 1e-05 Score: 0.033025369097192336


In [50]:
test_eps_poly(eps,'linear')

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.30230789845098904
epsilon: 0.01 Score: 0.7383676075809289
epsilon: 0.001 Score: 0.7423225836955063
epsilon: 0.0001 Score: 0.7419842403590312
epsilon: 1e-05 Score: 0.7415632921907256


### Testing different gammas and C's with the best epsilon (ep = 0.001)

In [55]:
gammas = [k for k in range(5,15)]

In [58]:
for g in gammas:
    svr = svm.SVR(epsilon = 0.001,gamma=g)
    svr.fit(X, y)
    print('Gamma:', g, 'Score:', svr.score(X, y))

Gamma: 5 Score: 0.9873494349785643
Gamma: 6 Score: 0.9896468142110767
Gamma: 7 Score: 0.9915437532541258
Gamma: 8 Score: 0.9928600775723027
Gamma: 9 Score: 0.9938194370697923
Gamma: 10 Score: 0.9943176477092133
Gamma: 11 Score: 0.9950473476383049
Gamma: 12 Score: 0.9957055971894013
Gamma: 13 Score: 0.9960348536598931
Gamma: 14 Score: 0.9964294458939229


In [56]:
for g in gammas:
    svr = svm.SVR(epsilon = 0.001,gamma=g)
    svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', svr.score(poly_X, y))

Gamma: 5 Score: 0.9942041544651568
Gamma: 6 Score: 0.9949590286625649
Gamma: 7 Score: 0.9959887665057138
Gamma: 8 Score: 0.9967212131481874
Gamma: 9 Score: 0.9970704612083636
Gamma: 10 Score: 0.997369122110379
Gamma: 11 Score: 0.9976073392791844
Gamma: 12 Score: 0.9978102241291773
Gamma: 13 Score: 0.9980119480604562
Gamma: 14 Score: 0.9982058134266828


In [57]:
for g in gammas:
    svr = svm.SVR(epsilon = 0.001,gamma=g,C=10)
    svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', svr.score(poly_X, y))

Gamma: 5 Score: 0.9959339722179822
Gamma: 6 Score: 0.9969121599524313
Gamma: 7 Score: 0.997220984328346
Gamma: 8 Score: 0.9976241952596338
Gamma: 9 Score: 0.9979210665509624
Gamma: 10 Score: 0.9982026367167417
Gamma: 11 Score: 0.9983697159015883
Gamma: 12 Score: 0.9984822015498964
Gamma: 13 Score: 0.9986570068398148
Gamma: 14 Score: 0.9987834767110767


### High Gamma, Low epsilon, different polynomial degrees

In [18]:
test_poly_degs(degs,4,0.001) # more calculation needed for high degree and gamma

degree= 2 Score: 0.7340013709531641
degree= 3 Score: 0.9219207554626947
degree= 4 Score: 0.9276225761973721
degree= 5 Score: 0.9622635824286132


### Try high gamma, high or default C, low epsilon. Train test split.

In [89]:
from sklearn import model_selection

In [94]:
X_tr, X_test, y_tr, y_test = model_selection.train_test_split(X, y, test_size=0.20)

In [95]:
model = svm.SVR(gamma = 8, epsilon = 0.001)
model.fit(X_tr, y_tr)
print('Training score:', model.score(X_tr, y_tr))
print('Testing score:', model.score(X_test, y_test))

Training score: 0.9928627879067804
Testing score: 0.9907686012196372
