In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Линейная регрессия для множественных фичей

### Класс линейной регрессии (аналитическое решение)

In [2]:
class linear_regression:
    def __init__(self):
        return None
    def fit(self, x, y):
        X = np.array(x).astype('float64')
        Y = np.array(y).astype('float64')
        self.weight = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y)
        return True
    def predict(self, x):
        X = np.array(x)
        Y = X.dot(self.weight)
        return Y
    def mean_squared_error(self, y_true, y_pred):
        mse = np.average((y_true - y_pred) ** 2, axis=0)
        return mse

In [3]:
def transform_features(X, max_power=2):
    d = X.shape[1]

    f_powered = np.ones((X.shape), dtype='int64')
    for p in range(1, max_power+1):
        f_powered = np.column_stack((f_powered, X ** p))

    def recursion(factor, X_transformed, i=0):
        for j in range(i, i + d*(p + 1), d):
            product = factor * f_powered[:, j]
            if i < (d - 1):
                X_transformed = recursion(product, X_transformed, i + 1)
            else:
                X_transformed = np.column_stack((X_transformed, product))
        return X_transformed

    return recursion(f_powered[:, 0], np.ones((X.shape[0], 1), dtype='int')[:, 1:])

In [4]:
X = []
X_test = []
Y = []
with open('input.txt', 'r') as f:
    for i in range(1000):
        in_str = f.readline().split('\t')
        X += [list(map(float, in_str))[:-1]]
        Y += list(map(float, in_str))[-1:]
    for i in range(1000):
        in_str = f.readline().split('\t')
        X_test += [list(map(float, in_str))]


In [5]:
X = np.array(X)
X_test = np.array(X_test)

features_train = X
features_test = X_test
target_train = Y

model = linear_regression()
model.fit(features_train, target_train)

predict = model.predict(features_test)

# разкомментить для вывода ответов модели
# for target in predict:
# 	print(target)

In [6]:
np.array([X[0]])

array([[0.71783647, 0.18442273, 0.60849903, 0.51268666, 0.27371875]])

In [7]:
transform_features(np.array([X[0]]), max_power=1)

array([[1.        , 0.27371875, 0.51268666, 0.14033195, 0.60849903,
        0.16655759, 0.31196933, 0.08539186, 0.18442273, 0.05047996,
        0.09455107, 0.0258804 , 0.11222105, 0.03071701, 0.05753424,
        0.0157482 , 0.71783647, 0.1964853 , 0.36802518, 0.10073539,
        0.43680279, 0.11956111, 0.22394296, 0.06129739, 0.13238536,
        0.03623636, 0.06787221, 0.0185779 , 0.08055636, 0.02204979,
        0.04130017, 0.01130463]])

In [8]:
X = np.array(X)
X_test = np.array(X_test)

trans_features_train = transform_features(X, max_power=1)
trans_features_test = transform_features(X_test, max_power=1)
target_train = Y

model = linear_regression()
model.fit(trans_features_train, target_train)

predict = model.predict(trans_features_test)

In [9]:
target_test = []
with open('answers.txt', 'r') as f:
    for line in f:
        target_test += [float(line)]

In [10]:
print('mse\n',model.mean_squared_error(target_test, predict))

mse
 3.0586633893333658e-24
