# Regression with synthetic data

In [49]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR, LinearSVR
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [50]:
X, y = make_regression(n_samples=30000, n_features=120)

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [52]:
%%time
reg = LinearSVR()
reg.fit(X_train, y_train)

CPU times: user 52.6 ms, sys: 17.3 ms, total: 69.9 ms
Wall time: 68.4 ms


LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=None, tol=0.0001, verbose=0)

In [53]:
pred = reg.predict(X_test)
mean_squared_error(y_test,pred)

8.379865779058889e-22

In [54]:
%%time
reg_rbf = SVR(kernel='rbf')
reg_rbf.fit(X_train, y_train)

CPU times: user 1min 13s, sys: 232 ms, total: 1min 14s
Wall time: 1min 13s


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [55]:
pred_rbf = reg_rbf.predict(X_test)
mean_squared_error(y_test,pred_rbf)

10643.092802988971

In [67]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = (1/(d*X_train.var(axis=0)))[np.newaxis,:]*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
reg_rff = LinearSVR()
reg_rff.fit(Z_train, y_train)

CPU times: user 1.5 s, sys: 264 ms, total: 1.76 s
Wall time: 646 ms


LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=None, tol=0.0001, verbose=0)

In [70]:
X_train.var()

0.9999999999999991

In [68]:
pred_rff = reg_rff.predict(Z_test)
mean_squared_error(y_test,pred_rff)

11191.281260210077

In [11]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
reg_rff_cos = LinearSVR()
reg_rff_cos.fit(Z_train, y_train)

CPU times: user 679 ms, sys: 70.8 ms, total: 750 ms
Wall time: 198 ms


LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=None, tol=0.0001, verbose=0)

In [12]:
pred_rff_cos = reg_rff_cos.predict(Z_test)
mean_squared_error(y_test,pred_rff_cos)

161059.11461381108

# Classification with synthetic data

In [13]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score

In [14]:
X, y = make_classification(n_samples=20000, n_features=120, n_informative=50, n_classes=5)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [16]:
%%time
clf = LinearSVC(dual=False)
clf.fit(X_train, y_train)

CPU times: user 760 ms, sys: 21 ms, total: 781 ms
Wall time: 779 ms


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [17]:
pred = clf.predict(X_test)
accuracy_score(y_test,pred)

0.5436

In [18]:
%%time
clf_rbf = SVC(kernel='rbf')
clf_rbf.fit(X_train, y_train)

CPU times: user 46.6 s, sys: 778 ms, total: 47.3 s
Wall time: 49.4 s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [19]:
pred_rbf = clf_rbf.predict(X_test)
accuracy_score(y_test,pred_rbf)

0.8672

In [24]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
clf_rff = LinearSVC(dual=False)
clf_rff.fit(Z_train, y_train)

CPU times: user 6.72 s, sys: 208 ms, total: 6.93 s
Wall time: 5.91 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [25]:
pred_rff = clf_rff.predict(Z_test)
accuracy_score(y_test,pred_rff)

0.5614

In [34]:
%%time
d = X.shape[1]
D = 600
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
clf_rff = LinearSVC(dual=False)
clf_rff.fit(Z_train, y_train)

CPU times: user 17.5 s, sys: 577 ms, total: 18.1 s
Wall time: 17.1 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [35]:
pred_rff = clf_rff.predict(Z_test)
accuracy_score(y_test,pred_rff)

0.617

In [38]:
%%time
d = X.shape[1]
D = 900
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
clf_rff = LinearSVC(dual=False)
clf_rff.fit(Z_train, y_train)

CPU times: user 22.6 s, sys: 710 ms, total: 23.3 s
Wall time: 21.5 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [39]:
pred_rff = clf_rff.predict(Z_test)
accuracy_score(y_test,pred_rff)

0.6578

In [32]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
clf_rff_cos = LinearSVC(dual=False)
clf_rff_cos.fit(Z_train, y_train)

CPU times: user 3.43 s, sys: 101 ms, total: 3.53 s
Wall time: 2.69 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [33]:
pred_rff_cos = clf_rff_cos.predict(Z_test)
accuracy_score(y_test,pred_rff_cos)

0.515

In [36]:
%%time
d = X.shape[1]
D = 600
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
clf_rff_cos = LinearSVC(dual=False)
clf_rff_cos.fit(Z_train, y_train)

CPU times: user 8.62 s, sys: 284 ms, total: 8.9 s
Wall time: 7.9 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [37]:
pred_rff_cos = clf_rff_cos.predict(Z_test)
accuracy_score(y_test,pred_rff_cos)

0.571

In [40]:
%%time
d = X.shape[1]
D = 900
mu = np.zeros(d)
sigma = (1/(d*X_train.var()))*np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
clf_rff_cos = LinearSVC(dual=False)
clf_rff_cos.fit(Z_train, y_train)

CPU times: user 10.6 s, sys: 238 ms, total: 10.9 s
Wall time: 9.56 s


LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [41]:
pred_rff_cos = clf_rff_cos.predict(Z_test)
accuracy_score(y_test,pred_rff_cos)

0.6016