In [None]:
import cuml
import cudf
from cuml import make_regression, train_test_split
from cuml.linear_model import LinearRegression as cuLinearRegression
from cuml.metrics.regression import r2_score
from sklearn.linear_model import LinearRegression as skLinearRegression

In [None]:
########################################################################
#
# rapids test
#
########################################################################

In [None]:
# define params
n_samples = 2**2 #If you are running on a GPU with less than 16GB RAM, please change to 2**19 or you could run out of memory
n_features = 50
random_state = 23

# generate data
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=random_state)
X = cudf.DataFrame(X)
y = cudf.DataFrame(y)[0]
X_cudf, X_cudf_test, y_cudf, y_cudf_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)

# Copy dataset from GPU memory to host memory.
# This is done to later compare CPU and GPU results.
X_train = X_cudf.to_pandas()
X_test = X_cudf_test.to_pandas()
y_train = y_cudf.to_pandas()
y_test = y_cudf_test.to_pandas()

In [None]:
# build models
# sci kit model
ols_sk = skLinearRegression(fit_intercept=True,
                            normalize=True,
                            n_jobs=-1)

ols_sk.fit(X_train, y_train)
predict_sk = ols_sk.predict(X_test)
r2_score_sk = r2_score(y_cudf_test, predict_sk)

# cuml model
ols_cuml = cuLinearRegression(fit_intercept=True,
                              normalize=True,
                              algorithm='eig')

ols_cuml.fit(X_cudf, y_cudf)
predict_cuml = ols_cuml.predict(X_cudf_test)
r2_score_cuml = r2_score(y_cudf_test, predict_cuml)

In [None]:
print("R^2 score (SKL):  %s" % r2_score_sk)
print("R^2 score (cuML): %s" % r2_score_cuml)

In [None]:
########################################################################
#
# autogluon test
#
########################################################################

In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor

In [None]:
train_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
subsample_size = 500  # subsample subset of data for faster demo, try setting this to much larger values
train_data = train_data.sample(n=subsample_size, random_state=0)
train_data.head()

In [None]:
label = 'class'
print("Summary of class variable: \n", train_data[label].describe())

In [None]:
save_path = 'agModels-predictClass'  # specifies folder to store trained models
predictor = TabularPredictor(label=label, path=save_path).fit(train_data)