In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from itertools import combinations

In [26]:
def regression(x, lags, validate_size, test_size):
    print("x:", x)
    x_data = list()
    y_data = list()
    for i in range(len(x) - lags):
        x_data.append(x[i:i+lags])
        y_data.append(x[i+lags])
    x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=validate_size, shuffle=False)
    x_train = np.array(x_train)
    x_val = np.array(x_val)
    print("x_train:\n", x_train)
    print("y_train:\n", y_train)
    print("x_val:\n", x_val)
    print("y_val:\n", y_val)
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=test_size, shuffle=False)
    x_train = np.array(x_train)
    x_test = np.array(x_test)
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    coeffs = list(lr.coef_)
    coeffs.append(lr.intercept_)
    coeffs = [round(coeff, 6) for coeff in coeffs]
    y_pred = lr.predict(x_val)
    error = round(mean_squared_error(y_pred, y_val), 6)
    print("best_coeffs:\n", coeffs)
    print("y_pred:\n", y_pred)

In [19]:
def regression_combi(x, lags, validate_size, test_size):
    print("x:", x)
    x_data = list()
    y_data = list()
    for i in range(len(x) - lags):
        x_data.append(x[i:i+lags])
        y_data.append(x[i+lags])
    x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=validate_size, shuffle=False)
    x_train = np.array(x_train)
    x_val = np.array(x_val)
    print("x_train:\n", x_train)
    print("y_train:\n", y_train)
    print("x_val:\n", x_val)
    print("y_val:\n", y_val)
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=test_size, shuffle=False)
    x_train = np.array(x_train)
    x_test = np.array(x_test)
    lr = LinearRegression()
    last_error = 100000000000000
    best_coeffs = []
    best_polynom = []
    for level in range(1, x_train.shape[1] + 1):
        errors = []
        for comb in combinations(range(x_train.shape[1]), level):
            lr.fit(x_train[:, comb], y_train)
            coeffs = list(lr.coef_)
            coeffs.append(lr.intercept_)
            coeffs = [round(coeff, 6) for coeff in coeffs]
            y_pred = lr.predict(x_test[:, comb])
            error = round(mean_squared_error(y_pred, y_test), 6)
            errors.append((error, coeffs, comb))
        errors.sort()
        #print(errors[0])
        if last_error > errors[0][0]:
            last_error = errors[0][0]
            best_coeffs = errors[0][1]
            best_polynom = errors[0][2]
        else:
            break
    print("best_polynom:\n", best_polynom)
    print("best_coeffs:\n", best_coeffs)
    y_pred = lr.fit(x_train[:, best_polynom], y_train).predict(x_val[:, best_polynom])
    print("y_pred:\n", y_pred)

In [29]:
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
lags = 5
validate_size = 0.33
test_size = 0.33

In [28]:
regression(x, lags, validate_size, test_size)

x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
x_train:
 [[1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]
y_train:
 [6, 7, 8]
x_val:
 [[4 5 6 7 8]
 [5 6 7 8 9]]
y_val:
 [9, 10]
best_coeffs:
 [0.2, 0.2, 0.2, 0.2, 0.2, 3.0]
y_pred:
 [ 9. 10.]


In [31]:
regression_combi(x, lags, validate_size, test_size)

x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
x_train:
 [[1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]
y_train:
 [6, 7, 8]
x_val:
 [[4 5 6 7 8]
 [5 6 7 8 9]]
y_val:
 [9, 10]
best_polynom:
 (4,)
best_coeffs:
 [1.0, 1.0]
y_pred:
 [ 9. 10.]
