In [5]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import learning_curve
from sklearn.model_selection import validation_curve
from sklearn.model_selection import ShuffleSplit, train_test_split

In [6]:
import cuml
from cuml import make_regression, train_test_split
from cuml.metrics.regression import r2_score
from cuml.linear_model import LinearRegression as cuLR
from sklearn.linear_model import LinearRegression as skLR
import cudf

In [3]:
# Testing ground for cuML
n_samples = 2**19
n_features = 399
random_state = 23

In [7]:
%%time
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=random_state)

X = cudf.DataFrame(X)
y = cudf.DataFrame(y)[0]

X_cudf, X_cudf_test, y_cudf, y_cudf_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

CPU times: user 1.11 s, sys: 138 ms, total: 1.25 s
Wall time: 1.27 s


In [8]:
X_train = X_cudf.to_pandas()
X_test = X_cudf_test.to_pandas()
y_train = y_cudf.to_pandas()
y_test = y_cudf_test.to_pandas()

In [9]:
%%time
ols_sk = skLR(fit_intercept=True, normalize=True, n_jobs=1)

ols_sk.fit(X_train, y_train)

CPU times: user 25.7 s, sys: 2.12 s, total: 27.9 s
Wall time: 6.52 s


LinearRegression(n_jobs=1, normalize=True)

In [10]:
%%time
predict_sk = ols_sk.predict(X_test)

CPU times: user 347 ms, sys: 6.4 ms, total: 354 ms
Wall time: 59.3 ms


In [11]:
%%time
r2_score_sk = r2_score(y_test, predict_sk)

CPU times: user 1.83 ms, sys: 20.8 ms, total: 22.6 ms
Wall time: 20.2 ms


In [12]:
%%time
ols_cuml = cuLR(fit_intercept=True, normalize=True, algorithm='eig')

ols_cuml.fit(X_cudf, y_cudf)

CPU times: user 205 ms, sys: 7.93 ms, total: 213 ms
Wall time: 211 ms


LinearRegression(algorithm='eig', fit_intercept=True, normalize=True, handle=<cuml.raft.common.handle.Handle object at 0x7f2c282e8d70>, verbose=4, output_type='input')

In [13]:
%%time
predict_cuml = ols_cuml.predict(X_cudf_test)

CPU times: user 47.8 ms, sys: 2.33 ms, total: 50.1 ms
Wall time: 48.3 ms


In [14]:
%%time
r2_score_cuml = r2_score(y_cudf_test, predict_cuml)

CPU times: user 2.55 ms, sys: 0 ns, total: 2.55 ms
Wall time: 2.38 ms


In [15]:
print("R^2 score (SKL):  %s" % r2_score_sk)
print("R^2 score (cuML): %s" % r2_score_cuml)

R^2 score (SKL):  1.0
R^2 score (cuML): 1.0
