Question 1.

In [8]:
import finalGenData
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold

# number of samples
N = 1000

# generate data & split it into X (training input) and y (target output)
X, y = finalGenData.genDataSet(N)

# linear regression solution
w=np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)


#penC  <- Penalty parameter C of the error term
#tubEpsilon  <- the epsilon-tube within which no penalty is associated

bestC=0
bestEpsilon=0
bestGamma=0
bestScore=float('-inf')
score=0
for penC in np.logspace(-5, 15, num=11, base=2):
  for tubEpsilon in np.linspace(0, 1, num=11):
    for paramGamma in np.logspace(-15, 3, num=10, base=2):
      kf = KFold(n_splits=10)
      cvscore=[]
      for train, validation in kf.split(X):
        X_train, X_validation, y_train, y_validation = X[train, :], X[validation, :], y[train], y[validation]
        # here we create the SVR
        svr =  SVR(C=penC, epsilon=tubEpsilon, gamma=paramGamma, kernel='rbf', verbose=False)
        # here we train the SVR
        svr.fit(X_train, y_train)
        # now we get E_out for validation set
        score=svr.score(X_validation, y_validation)
        cvscore.append(score)

      # average CV score
      score=sum(cvscore)/len(cvscore)
      if (score > bestScore):
        bestScore=score
        bestC=penC
        bestEpsilon=tubEpsilon
        bestGamma=paramGamma
        print("C " + str(penC) + ", epsilon " + str(tubEpsilon) + ", gamma " + str(paramGamma) + ". Testing set CV score: %f" % score)

# here we get a new training dataset
X, y = finalGenData.genDataSet(N)
# here we create the final SVR
svr =  SVR(C=bestC, epsilon=bestEpsilon, gamma=bestGamma, kernel='rbf', verbose=True)
# here we train the final SVR
svr.fit(X, y)
# E_out in training
print("Training set score: %f" % svr.score(X, y)) 
# here we get a new testing dataset
X, y = finalGenData.genDataSet(N)
# here test the final SVR and get E_out for testing set
ypred=svr.predict(X)
score=svr.score(X, y)
print("Testing set score: %f" % score)
plt.plot(X[:, 0], X[:, 1], '.')
plt.plot(X[:, 0], y, 'rx')
plt.plot(X[:, 0], ypred, '-k')
ypredLR=X.dot(w)
plt.plot(X[:, 0], ypredLR, '--g')
plt.show()

C 0.03125, epsilon 0.0, gamma 3.0517578125e-05. Testing set CV score: -0.336431
C 0.03125, epsilon 0.0, gamma 0.00048828125. Testing set CV score: -0.326790
C 0.03125, epsilon 0.0, gamma 0.03125. Testing set CV score: -0.248916
C 0.03125, epsilon 0.0, gamma 2.0. Testing set CV score: 0.051664
C 0.125, epsilon 0.0, gamma 2.0. Testing set CV score: 0.070207


KeyboardInterrupt: ignored

Question 2

In [0]:
import finalGetDigits
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold

# get digits data X (training input) and y (target output)
X, y, X_te, y_te = finalGetDigits.getDataSet()

#penC  <- Penalty parameter C of the error term
#tubEpsilon  <- the epsilon-tube within which no penalty is associated

bestC=0
bestEpsilon=0
bestGamma=0
bestScore=float('-inf')
score=0
for penC in np.logspace(6, 12, num=7, base=2):
  for tubEpsilon in np.linspace(0.5, 2.5, num=21):
    for paramGamma in np.logspace(-6, -2, num=5, base=2):
      kf = KFold(n_splits=np.random.randint(2,11))
      cvscore=[]
      for train, validation in kf.split(X):
        X_train, X_validation, y_train, y_validation = X[train, :], X[validation, :], y[train], y[validation]
        # here we create the SVR
        svr =  SVR(C=penC, epsilon=tubEpsilon, gamma=paramGamma, kernel='rbf', verbose=False)
        # here we train the SVR
        svr.fit(X_train, y_train)
        # now we get E_out for validation set
        score=svr.score(X_validation, y_validation)
        cvscore.append(score)

      # average CV score
      score=sum(cvscore)/len(cvscore)
      if (score > bestScore):
        bestScore=score
        bestC=penC
        bestEpsilon=tubEpsilon
        bestGamma=paramGamma
        print("BEST! -> C " + str(penC) + ", epsilon " + str(tubEpsilon) + ", gamma " + str(paramGamma) + ". Testing set CV score: %f" % score)
      else:
        print("C " + str(penC) + ", epsilon " + str(tubEpsilon) + ", gamma " + str(paramGamma) + ". Testing set CV score: %f" % score)

# here we create the final SVR
svr =  SVR(C=bestC, epsilon=bestEpsilon, gamma=bestGamma, kernel='rbf', verbose=True)
# here we train the final SVR
svr.fit(X, y)
# E_out in training
print("Training set score: %f" % svr.score(X, y)) 
# here test the final SVR and get E_out for testing set
ypred=svr.predict(X_te)
score=svr.score(X_te, y_te)
print("Testing set score: %f" % score)

x_min, x_max = np.min(X_te, axis=0), np.max(X_te, axis=0)
X_te = (X_te - x_min) / (x_max - x_min)

plt.figure(figsize=(6, 4))
for i in range(X_te.shape[0]):
  plt.text(X_te[i, 0], X_te[i, 1], str(y_te[i]), color=plt.cm.spectral(round(ypred[i]) / 10.), fontdict={'weight': 'bold', 'size': 9})

plt.xticks([])
plt.yticks([])
plt.axis('off')
plt.tight_layout()

plt.show()

BEST! -> C 64.0, epsilon 0.5, gamma 0.015625. Testing set CV score: 0.297933
BEST! -> C 64.0, epsilon 0.5, gamma 0.03125. Testing set CV score: 0.310104
BEST! -> C 64.0, epsilon 0.5, gamma 0.0625. Testing set CV score: 0.318274
BEST! -> C 64.0, epsilon 0.5, gamma 0.125. Testing set CV score: 0.323933
BEST! -> C 64.0, epsilon 0.5, gamma 0.25. Testing set CV score: 0.327161
C 64.0, epsilon 0.6, gamma 0.015625. Testing set CV score: 0.301229
C 64.0, epsilon 0.6, gamma 0.03125. Testing set CV score: 0.309170
C 64.0, epsilon 0.6, gamma 0.0625. Testing set CV score: 0.321021
C 64.0, epsilon 0.6, gamma 0.125. Testing set CV score: 0.312275
C 64.0, epsilon 0.6, gamma 0.25. Testing set CV score: 0.324511
C 64.0, epsilon 0.7, gamma 0.015625. Testing set CV score: 0.306610
C 64.0, epsilon 0.7, gamma 0.03125. Testing set CV score: 0.313166
C 64.0, epsilon 0.7, gamma 0.0625. Testing set CV score: 0.321618
BEST! -> C 64.0, epsilon 0.7, gamma 0.125. Testing set CV score: 0.328288
C 64.0, epsilon 0.7,