In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error

### data

In [None]:
X_train = pd.read_csv("../result/data/X_train", sep="\t", header=None).values
X_valid = pd.read_csv("../result/data/X_valid", sep="\t", header=None).values
X_test = pd.read_csv("../result/data/X_test", sep="\t", header=None).values
Y_train = pd.read_csv("../result/data/Y_train", sep="\t", header=None).values.reshape(-1) * 1000
Y_valid = pd.read_csv("../result/data/Y_valid", sep="\t", header=None).values.reshape(-1) * 1000
Y_test = pd.read_csv("../result/data/Y_test", sep="\t", header=None).values.reshape(-1) * 1000

### Linear

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()
lr.fit(np.concatenate([X_train, X_valid]), np.concatenate([Y_train, Y_valid]))
Y_pred = lr.predict(X_test)
mean_squared_error(Y_test, Y_pred)

### Lasso

In [None]:
from sklearn.linear_model import Lasso

In [None]:
para_list = np.arange(0.1,5,0.1)

mse_list = []
for alpha in para_list:
    lr = Lasso(alpha=alpha)
    lr.fit(X_train, Y_train)
    y_valid_pred = lr.predict(X_valid)
    mse_list.append(mean_squared_error(Y_valid, y_valid_pred))

In [None]:
best_para = para_list[np.argmin(mse_list)]

In [None]:
lr = Lasso(alpha=best_para)
lr.fit(np.concatenate([X_train, X_valid]), np.concatenate([Y_train, Y_valid]))
Y_pred = lr.predict(X_test)
mean_squared_error(Y_test, Y_pred)

In [None]:
joblib.dump(lr, '../result/model/LR.joblib') 