# Regression with synthetic data

In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
X, y = make_regression(n_samples=20000, n_features=120, n_informative=50)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
%%time
reg = SVR(kernel='linear')
reg.fit(X_train, y_train)

CPU times: user 9.6 s, sys: 44.1 ms, total: 9.65 s
Wall time: 9.08 s


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [5]:
pred = reg.predict(X_test)
mean_squared_error(y_test,pred)

0.0016690162997531467

In [6]:
%%time
reg_rbf = SVR(kernel='rbf')
reg_rbf.fit(X_train, y_train)

CPU times: user 32.7 s, sys: 176 ms, total: 32.8 s
Wall time: 32.3 s


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [7]:
pred_rbf = reg_rbf.predict(X_test)
mean_squared_error(y_test,pred_rbf)

154343.31413584977

In [8]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
reg_rff = SVR(kernel='linear')
reg_rff.fit(Z_train, y_train)

CPU times: user 3min 5s, sys: 2.59 s, total: 3min 8s
Wall time: 3min 12s


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [9]:
pred_rff = reg_rff.predict(Z_test)
mean_squared_error(y_test,pred_rff)

176397.90332969994

In [10]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
reg_rff_cos = SVR(kernel='linear')
reg_rff_cos.fit(Z_train, y_train)

CPU times: user 1min 13s, sys: 311 ms, total: 1min 13s
Wall time: 1min 12s


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [11]:
pred_rff_cos = reg_rff_cos.predict(Z_test)
mean_squared_error(y_test,pred_rff_cos)

176334.30078476266

# Classification with synthetic data

In [12]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [13]:
X, y = make_classification(n_samples=20000, n_features=120, n_informative=50, n_classes=5)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
%%time
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

CPU times: user 1min 40s, sys: 524 ms, total: 1min 40s
Wall time: 1min 40s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [16]:
pred = clf.predict(X_test)
accuracy_score(y_test,pred)

0.653

In [17]:
%%time
clf_rbf = SVC(kernel='rbf')
clf_rbf.fit(X_train, y_train)

CPU times: user 39.2 s, sys: 177 ms, total: 39.4 s
Wall time: 38.8 s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [18]:
pred_rbf = clf_rbf.predict(X_test)
accuracy_score(y_test,pred_rbf)

0.8812

In [19]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
Z_train = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_train, w.T)), np.sin(np.dot(X_train, w.T))), axis=1)
Z_test = np.sqrt(1/D) * np.concatenate((np.cos(np.dot(X_test, w.T)), np.sin(np.dot(X_test, w.T))), axis=1)
clf_rff = SVC(kernel='linear')
clf_rff.fit(Z_train, y_train)

CPU times: user 4min 19s, sys: 2.06 s, total: 4min 21s
Wall time: 4min 23s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [20]:
pred_rff = clf_rff.predict(Z_test)
accuracy_score(y_test,pred_rff)

0.1928

In [21]:
%%time
d = X.shape[1]
D = 300
mu = np.zeros(d)
sigma = np.identity(d)
w = np.random.multivariate_normal(mu, sigma, D)
b = np.random.uniform(0, 2*np.pi, D)
Z_train = np.sqrt(2/D) * np.cos(np.dot(X_train, w.T) + b)
Z_test = np.sqrt(2/D) * np.cos(np.dot(X_test, w.T) + b)
clf_rff_cos = SVC(kernel='linear')
clf_rff_cos.fit(Z_train, y_train)

CPU times: user 2min 11s, sys: 962 ms, total: 2min 12s
Wall time: 2min 12s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [22]:
pred_rff_cos = clf_rff_cos.predict(Z_test)
accuracy_score(y_test,pred_rff_cos)

0.1976