# Linear Regression

線性回歸是最基本的機器學習模型之一。

## 1. Import 相關套件

In [1]:
import cudf
from ncue import make_regression, train_test_split
from ncue.linear_model import LinearRegression as cuLinearRegression
from ncue.metrics.regression import r2_score
from sklearn.linear_model import LinearRegression as skLinearRegression

## 2. 定義 Parameters

In [2]:
n_samples = 2**20
n_features = 399

random_state = 23

## 3. 產生測試資料

In [3]:
%%time
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=random_state)

X = cudf.DataFrame.from_gpu_matrix(X)
y = cudf.DataFrame.from_gpu_matrix(y)[0]

X_cudf, X_cudf_test, y_cudf, y_cudf_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)

CPU times: user 2.44 s, sys: 692 ms, total: 3.13 s
Wall time: 3.13 s


In [4]:
# 將資料從GPU MEMORY複製到RAM，方便sklearn使用，以利最後結果的比對
X_train = X_cudf.to_pandas()
X_test = X_cudf_test.to_pandas()
y_train = y_cudf.to_pandas()
y_test = y_cudf_test.to_pandas()

## 4. Scikit-learn 模型(CPU)

### 訓練模型

In [5]:
%%time
ols_sk = skLinearRegression(fit_intercept=True,
                            normalize=True,
                            n_jobs=-1)

ols_sk.fit(X_train, y_train)

CPU times: user 2min 47s, sys: 26.6 s, total: 3min 13s
Wall time: 16.6 s


### 預測

In [6]:
%%time
predict_sk = ols_sk.predict(X_test)

CPU times: user 491 ms, sys: 884 ms, total: 1.38 s
Wall time: 183 ms


### 驗證

In [7]:
%%time
r2_score_sk = r2_score(y_cudf_test, predict_sk)

CPU times: user 21.3 ms, sys: 57.6 ms, total: 78.9 ms
Wall time: 5.9 ms


## 5. NCUE 模型(GPU)

### 訓練模型

In [8]:
%%time
ols_ncue = cuLinearRegression(fit_intercept=True,
                              normalize=True,
                              algorithm='eig')

ols_ncue.fit(X_cudf, y_cudf)
#我們的模型支援 array-like objects，e.g. NumPy arrays, cuDF DataFrames

CPU times: user 519 ms, sys: 552 ms, total: 1.07 s
Wall time: 515 ms


### 預測

In [9]:
%%time
predict_ncue = ols_ncue.predict(X_cudf_test)

CPU times: user 185 ms, sys: 8.37 ms, total: 193 ms
Wall time: 192 ms


### 驗證

In [10]:
%%time
r2_score_ncue = r2_score(y_cudf_test, predict_ncue)

CPU times: user 999 µs, sys: 176 µs, total: 1.18 ms
Wall time: 1.18 ms


## 6. 比對運算結果(CPU vs. GPU)

In [11]:
print("R^2 score (CPU):  %s" % r2_score_sk)
print("R^2 score (GPU): %s" % r2_score_ncue)

R^2 score (CPU):  1.0
R^2 score (GPU): 1.0
