In [1]:
import cudf as pd
from sklearn.datasets import load_boston
boston = load_boston()
df = pd.DataFrame(data=boston.data, columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])
df['TARGET'] = boston.target
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,TARGET
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [2]:
import cupy as np
X = df.iloc[:,0:13]
Y = df.iloc[:, 13]
Y = np.array(Y)
Y = Y.reshape(-1,1)

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.as_matrix())

In [4]:
X_scaled

array([[-0.41978194,  0.28482986, -1.2879095 , ..., -1.45900038,
         0.44105193, -1.0755623 ],
       [-0.41733926, -0.48772236, -0.59338101, ..., -0.30309415,
         0.44105193, -0.49243937],
       [-0.41734159, -0.48772236, -0.59338101, ..., -0.30309415,
         0.39642699, -1.2087274 ],
       ...,
       [-0.41344658, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.98304761],
       [-0.40776407, -0.48772236,  0.11573841, ...,  1.17646583,
         0.4032249 , -0.86530163],
       [-0.41500016, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.66905833]])

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size = 0.30, random_state=42)

In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(354, 13)
(152, 13)
(354, 1)
(152, 1)


In [7]:
import cuml
from cuml import Ridge

In [8]:
alpha = np.array([1e-5])
ridge = Ridge(alpha = alpha, fit_intercept = True, normalize = False, solver = "eig")

In [9]:
result_ridge = ridge.fit(X_train, y_train)
print("Coefficients:")
print(result_ridge.coef_)
print("Intercept:")
print(result_ridge.intercept_)

Coefficients:
[-1.14691398  0.83432582  0.33940664  0.79163618 -1.78472664  2.84783956
 -0.30429309 -2.9156248   2.11139957 -1.46519873 -1.96963459  1.07567768
 -3.90310711]
Intercept:
22.509454717075215


In [10]:
preds = result_ridge.predict(X_test)

In [11]:
print("MSE")
print(cuml.metrics.regression.mean_squared_error(y_test, preds))
print("R2 score:")
print(cuml.metrics.regression.r2_score(y_test, preds))
print("MAE:")
print(cuml.metrics.regression.mean_absolute_error(y_test, preds))

MSE
139.59931994445407
R2 score:
0.7112260015709102
MAE:
8.983415496553802


In [12]:
from cuml.linear_model import Lasso
ls = Lasso(alpha=0.1)

In [13]:
result_lasso = ls.fit(X_train, y_train)
print("Coefficients:")
print(result_lasso.coef_)
print("Intercept:")
print(result_lasso.intercept_)

Coefficients:
[-0.82672061  0.39323408  0.          0.80490657 -1.28995831  2.93306839
 -0.11769109 -2.12965495  0.51459017 -0.04896216 -1.8254361   0.95338885
 -3.93544882]
Intercept:
22.513044683878363


In [14]:
preds = result_lasso.predict(X_test)

In [15]:
print("MSE")
print(cuml.metrics.regression.mean_squared_error(y_test, preds))
print("R2 score:")
print(cuml.metrics.regression.r2_score(y_test, preds))
print("MAE:")
print(cuml.metrics.regression.mean_absolute_error(y_test, preds))

MSE
133.07640456326536
R2 score:
0.6943175280788381
MAE:
8.773179529124079
