# Regression with synthetic data

## Imports

In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR, LinearSVR
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
np.random.seed(42)

## Data Generation

In [2]:
X, y = make_regression(n_samples=30000, n_features=120)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
from rkhs.nystrom import FALKON

In [5]:
reg = FALKON(max_iter=500, beta_tol=1e-12)
reg.fit(X_train, y_train)

 10%|██████▏                                                    | 52/500 [00:45<06:22,  1.17it/s, improvement=3.85e-13]

FALKON(beta_tol=1e-12, kernel=None, lambda_reg=1e-06, m=None, max_iter=500,
       verbose=True)

In [6]:
mean_squared_error(y_test, reg.predict(X_test))

6584.116516900549

In [7]:
raise Exception

Exception: 

## Nystrom

In [None]:
%%time
from rkhs.nystrom import PlainNystrom
nys = PlainNystrom()

Z_train = nys.fit_transform(X_train)
E_train = np.zeros((len(X_train), len(X_train)))
E_train[:Z_train.shape[0], :Z_train.shape[1]] = Z_train
        
Z_test = nys.transform(X_test)
E_test = np.zeros((len(X_test), len(X_train)))
E_test[:Z_test.shape[0], :Z_test.shape[1]] = Z_test

In [None]:
E_train.shape, X_train.shape, Z_train.shape

In [None]:
E_test.shape, X_test.shape, Z_test.shape

In [None]:
from sklearn.svm import SVR
reg_nys = SVR(kernel="precomputed")
reg_nys.fit(E_train, y_train)
pred_nys = reg_nys.predict(E_test)
print(mean_squared_error(y_test, pred_nys))

In [None]:
from tqdm import tqdm
scores_test = list()
scores_train = list()
ms = list()
for m in tqdm(range(10, len(X_train), 200)):
    from rkhs.nystrom import PlainNystrom
    nys = PlainNystrom(m=m)
    Z_train = nys.fit_transform(X_train)
    Z_test = nys.transform(X_test)
    from sklearn.linear_model import SGDRegressor
    reg_nys = SGDRegressor(fit_intercept=True, max_iter=10000)
    reg_nys.fit(Z_train, y_train)
    pred_nys = reg_nys.predict(Z_test)
    scores_test.append(mean_squared_error(y_test, pred_nys))
    scores_train.append(mean_squared_error(y_train, reg_nys.predict(Z_train)))
    ms.append(m)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.log(ms), np.log(scores_test), label="test")
plt.plot(np.log(ms), np.log(scores_train), label="train")
plt.legend()

In [None]:
raise Exception

# Classification with synthetic data
## Imports

In [None]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score

## Data Generation

In [None]:
X, y = make_classification(n_samples=20000, n_features=120, n_informative=50, n_classes=5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Baselines
### Linear

In [None]:
%%time
clf = LinearSVC(dual=X_train.shape[0] <= X_train.shape[1])
clf.fit(X_train, y_train)

In [None]:
pred = clf.predict(X_test)
accuracy_score(y_test,pred)

### Radial Basis Function

In [None]:
%%time
clf_rbf = SVC(kernel='rbf')
clf_rbf.fit(X_train, y_train)

In [None]:
pred_rbf = clf_rbf.predict(X_test)
accuracy_score(y_test,pred_rbf)

## Nystrom

In [None]:
%%time
from rkhs.nystrom import PlainNystrom
nys = PlainNystrom()
Z_train = nys.fit_transform(X_train)
Z_test = nys.transform(X_test)

In [None]:
print(Z_test.shape)

In [None]:
%%time
from sklearn.linear_model import SGDClassifier
clf_nys = SGDClassifier(fit_intercept=False, max_iter=10000)
clf_nys.fit(Z_train, y_train)

In [None]:
pred_nys = clf_nys.predict(Z_test)
accuracy_score(y_test, pred_nys)