In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [15]:
from sklearn import svm

In [3]:
packing_df = pd.read_csv('packing_sim.csv')

In [4]:
packing_df.dropna(how='all', axis=1, inplace=True) #drop empty columns

In [6]:
packing_df

Unnamed: 0,RCA,Pumice,Cement,Packing
0,0.00,0.00,1.00,0.600
1,0.00,0.05,0.95,0.612
2,0.00,0.10,0.90,0.624
3,0.00,0.15,0.85,0.637
4,0.00,0.20,0.80,0.650
...,...,...,...,...
225,0.90,0.05,0.05,0.650
226,0.90,0.10,0.00,0.637
227,0.95,0.00,0.05,0.630
228,0.95,0.05,0.00,0.618


In [10]:
X, y = np.split( packing_df.to_numpy(), [-1], axis = 1 ) #split features from last column
y = y.flatten()

In [16]:
model = svm.SVR() # default is rbf kernel, gamma scales
model.fit(X, y)
model.score(X, y) # around .42

0.4227663122527535

In [27]:
# try poly kernel
poly_svr = svm.SVR(kernel='poly', degree=3) # default degree is 3
poly_svr.fit(X, y)
poly_svr.score(X, y)

0.2849737911833873

In [20]:
from sklearn import preprocessing

In [55]:
transformer = preprocessing.PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
poly_X = transformer.fit_transform(X)

In [36]:
poly_svr.fit(poly_X, y)
poly_svr.score(poly_X, y)

0.31539031746682

In [37]:
model.fit(poly_X, y)
model.score(poly_X, y)

0.3770950089010403

In [38]:
lin_svr = svm.SVR(kernel='linear')
lin_svr.fit(poly_X, y)
lin_svr.score(poly_X, y)

0.2961476097939195

### Testing different gammas with rbf

In [39]:
rbf_auto_svr = svm.SVR(gamma='auto') # 1/n_features, so .33
rbf_auto_svr.fit(X, y)
rbf_auto_svr.score(X, y)

0.3261486467798126

In [51]:
gammas = [k/10 for k in range(1,20)]

In [49]:
for g in gammas:
    rbf_svr = svm.SVR(gamma=g)
    rbf_svr.fit(X, y)
    print('Gamma:', g, 'Score:', rbf_svr.score(X, y))

Gamma: 1 Score: 0.368943151353576
Gamma: 2 Score: 0.41341224256907017
Gamma: 3 Score: 0.42854388239491104
Gamma: 4 Score: 0.4295383605326619


In [56]:
for g in gammas:
    rbf_svr = svm.SVR(gamma=g)
    rbf_svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', rbf_svr.score(poly_X, y))

Gamma: 0.1 Score: 0.3080515114969824
Gamma: 0.2 Score: 0.31533004786976027
Gamma: 0.3 Score: 0.32647681231865855
Gamma: 0.4 Score: 0.33867189346018556
Gamma: 0.5 Score: 0.3516361915373498
Gamma: 0.6 Score: 0.36371318763373417
Gamma: 0.7 Score: 0.37464411077496207
Gamma: 0.8 Score: 0.3835545784721276
Gamma: 0.9 Score: 0.3929252053222799
Gamma: 1.0 Score: 0.3992634688157104
Gamma: 1.1 Score: 0.4054728967320723
Gamma: 1.2 Score: 0.4109572937020375
Gamma: 1.3 Score: 0.4153673798343305
Gamma: 1.4 Score: 0.4188430648212532
Gamma: 1.5 Score: 0.42150595302493565
Gamma: 1.6 Score: 0.4241471433631321
Gamma: 1.7 Score: 0.4253399185244868
Gamma: 1.8 Score: 0.42363171895634677
Gamma: 1.9 Score: 0.4242174681116442


In [59]:
for g in gammas:
    poly_svr = svm.SVR(kernel='poly', degree=2, gamma=g)
    poly_svr.fit(X, y)
    print('Gamma:', g, 'Score:', poly_svr.score(X, y))

Gamma: 0.1 Score: 0.13779877436171206
Gamma: 0.2 Score: 0.21772480185277476
Gamma: 0.3 Score: 0.29491331225377737
Gamma: 0.4 Score: 0.30126614877691227
Gamma: 0.5 Score: 0.3088962180556457
Gamma: 0.6 Score: 0.30916798498873166
Gamma: 0.7 Score: 0.304996797323816
Gamma: 0.8 Score: 0.3036371434987827
Gamma: 0.9 Score: 0.3036371031823505
Gamma: 1.0 Score: 0.30363712585030556
Gamma: 1.1 Score: 0.30457158845822185
Gamma: 1.2 Score: 0.3032487577305829
Gamma: 1.3 Score: 0.3045715849751257
Gamma: 1.4 Score: 0.3045715956006444
Gamma: 1.5 Score: 0.30324878831717705
Gamma: 1.6 Score: 0.3036371434987827
Gamma: 1.7 Score: 0.3045715841374631
Gamma: 1.8 Score: 0.3036371031823505
Gamma: 1.9 Score: 0.3036143926281656


In [60]:
for g in gammas:
    poly_svr = svm.SVR(kernel='poly', degree=2, gamma=g)
    poly_svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', poly_svr.score(poly_X, y))

Gamma: 0.1 Score: 0.22792055062810723
Gamma: 0.2 Score: 0.2838257743296596
Gamma: 0.3 Score: 0.28101589793112214
Gamma: 0.4 Score: 0.2811726430700764
Gamma: 0.5 Score: 0.2793114522665754
Gamma: 0.6 Score: 0.28116345396763365
Gamma: 0.7 Score: 0.2817215466244749
Gamma: 0.8 Score: 0.2811726430700764
Gamma: 0.9 Score: 0.28172155642254826
Gamma: 1.0 Score: 0.2793114522665754
Gamma: 1.1 Score: 0.2817215640677113
Gamma: 1.2 Score: 0.28116345396763365
Gamma: 1.3 Score: 0.28172154139280514
Gamma: 1.4 Score: 0.2817215466244749
Gamma: 1.5 Score: 0.2798432201100789
Gamma: 1.6 Score: 0.2811726430700764
Gamma: 1.7 Score: 0.28172153543127354
Gamma: 1.8 Score: 0.28172155642254826
Gamma: 1.9 Score: 0.27984322591825217


### Testing different C values

In [62]:
Cs = [10**k for k in range(1,-6,-1)]

In [63]:
for c in Cs:
    rbf_svr = svm.SVR(C=c)
    rbf_svr.fit(X, y)
    print('C:', c, 'Score:', rbf_svr.score(X, y))

C: 10 Score: 0.4227663122527535
C: 1 Score: 0.4227663122527535
C: 0.1 Score: 0.4227663122527535
C: 0.01 Score: 0.30598865055900737
C: 0.001 Score: 0.1074054051328458
C: 0.0001 Score: 0.009041280461337564
C: 1e-05 Score: -0.004123866701380319


In [64]:
for c in Cs:
    rbf_svr = svm.SVR(C=c)
    rbf_svr.fit(poly_X, y)
    print('C:', c, 'Score:', rbf_svr.score(poly_X, y))

C: 10 Score: 0.42304437401727923
C: 1 Score: 0.42304437401727923
C: 0.1 Score: 0.42304437401727923
C: 0.01 Score: 0.2961474582512619
C: 0.001 Score: 0.10980809054784446
C: 0.0001 Score: 0.00945330329496119
C: 1e-05 Score: -0.00408197143992739


In [66]:
for c in Cs:
    rbf_svr = svm.SVR(kernel='poly', degree=2, C=c)
    rbf_svr.fit(X, y)
    print('C:', c, 'Score:', rbf_svr.score(X, y))

C: 10 Score: 0.30457160644905346
C: 1 Score: 0.30457160644905346
C: 0.1 Score: 0.30457160644905346
C: 0.01 Score: 0.30761326652891574
C: 0.001 Score: 0.20246725370680962
C: 0.0001 Score: 0.04815666415935016
C: 1e-05 Score: 0.0012875851254353643


In [67]:
for c in Cs:
    rbf_svr = svm.SVR(kernel='linear', C=c)
    rbf_svr.fit(X, y)
    print('C:', c, 'Score:', rbf_svr.score(X, y))

C: 10 Score: 0.048800845738672605
C: 1 Score: 0.0487673831110913
C: 0.1 Score: 0.04808598882961834
C: 0.01 Score: 0.04090727451142562
C: 0.001 Score: 0.006721555332966478
C: 0.0001 Score: -0.004276604546944451
C: 1e-05 Score: -0.0054772446682243014


In [68]:
for c in Cs:
    rbf_svr = svm.SVR(kernel='linear', C=c)
    rbf_svr.fit(poly_X, y)
    print('C:', c, 'Score:', rbf_svr.score(poly_X, y))

C: 10 Score: 0.30230789845098904
C: 1 Score: 0.30230789845098904
C: 0.1 Score: 0.2884881434118546
C: 0.01 Score: 0.1269753198451241
C: 0.001 Score: 0.024891127534358515
C: 0.0001 Score: -0.0020543626689939476
C: 1e-05 Score: -0.005250643056183613


### Testing different epsilon values

In [72]:
eps = [10**k for k in range(1,-6,-1)]

In [73]:
for ep in eps:
    svr = svm.SVR(epsilon = ep)
    svr.fit(X, y)
    print('epsilon:', ep, 'Score:', svr.score(X, y))

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.4227663122527535
epsilon: 0.01 Score: 0.9857317376654934
epsilon: 0.001 Score: 0.9881714468023718
epsilon: 0.0001 Score: 0.9877193611681947
epsilon: 1e-05 Score: 0.9877833203395003


In [74]:
for ep in eps:
    svr = svm.SVR(epsilon = ep)
    svr.fit(poly_X, y)
    print('epsilon:', ep, 'Score:', svr.score(poly_X, y))

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.42304437401727923
epsilon: 0.01 Score: 0.9843000696590908
epsilon: 0.001 Score: 0.9864728584306907
epsilon: 0.0001 Score: 0.985789385705972
epsilon: 1e-05 Score: 0.985846116999631


In [78]:
for ep in eps:
    svr = svm.SVR(kernel='poly', degree=3, epsilon = ep)
    svr.fit(poly_X, y)
    print('epsilon:', ep, 'Score:', svr.score(poly_X, y))

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.2548816499592834
epsilon: 0.01 Score: 0.9678597909810893
epsilon: 0.001 Score: 0.9625407029366183
epsilon: 0.0001 Score: 0.9616781257780931
epsilon: 1e-05 Score: 0.9617636241871591


In [80]:
for ep in eps:
    svr = svm.SVR(kernel='linear', epsilon = ep)
    svr.fit(poly_X, y)
    print('epsilon:', ep, 'Score:', svr.score(poly_X, y))

epsilon: 10 Score: -0.0027681054458523757
epsilon: 1 Score: -0.0027681054458525978
epsilon: 0.1 Score: 0.30230789845098904
epsilon: 0.01 Score: 0.7383676075809289
epsilon: 0.001 Score: 0.7423225836955063
epsilon: 0.0001 Score: 0.7419842403590312
epsilon: 1e-05 Score: 0.7415632921907256


### Testing different gammas and C's with the best epsilon (ep = 0.001)

In [84]:
gammas = [k for k in range(1,10)]

In [87]:
for g in gammas:
    svr = svm.SVR(epsilon = 0.001,gamma=g)
    svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', svr.score(poly_X, y))

Gamma: 1 Score: 0.968351372231087
Gamma: 2 Score: 0.9833321465416324
Gamma: 3 Score: 0.9896470898138913
Gamma: 4 Score: 0.9922911133227431
Gamma: 5 Score: 0.9942041544651568
Gamma: 6 Score: 0.9949590286625649
Gamma: 7 Score: 0.9959887665057138
Gamma: 8 Score: 0.9967212131481874
Gamma: 9 Score: 0.9970704612083636


In [88]:
for g in gammas:
    svr = svm.SVR(epsilon = 0.001,gamma=g,C=10)
    svr.fit(poly_X, y)
    print('Gamma:', g, 'Score:', svr.score(poly_X, y))

Gamma: 1 Score: 0.978780461354092
Gamma: 2 Score: 0.9896040201395292
Gamma: 3 Score: 0.9931634687384273
Gamma: 4 Score: 0.994785091282211
Gamma: 5 Score: 0.9959339722179822
Gamma: 6 Score: 0.9969121599524313
Gamma: 7 Score: 0.997220984328346
Gamma: 8 Score: 0.9976241952596338
Gamma: 9 Score: 0.9979210665509624


### Try high gamma, high or default C, low epsilon. Train test split.

In [89]:
from sklearn import model_selection

In [94]:
X_tr, X_test, y_tr, y_test = model_selection.train_test_split(X, y, test_size=0.20)

In [95]:
model = svm.SVR(gamma = 8, epsilon = 0.001)
model.fit(X_tr, y_tr)
print('Training score:', model.score(X_tr, y_tr))
print('Testing score:', model.score(X_test, y_test))

Training score: 0.9928627879067804
Testing score: 0.9907686012196372
