# Higher-Order Regression

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [40]:
class HigherOrderRegressor():
    def __init__(self, degree):
        self.degree = degree
        self.X = None
        self.Y = None
        self.B = None

    def fit(self, X, Y):
        self.Y = Y
        self.X = np.vander(X, self.degree + 1, increasing=True)
        self.B = np.matmul(np.linalg.inv(np.matmul(self.X.T, self.X)), np.matmul(self.X.T, self.Y))

    def generate_powers(self, x):
        powers = [1]
        for i in range(1, self.degree+1):
            powers.append(powers[-1]*x)
        return np.array(powers)

    def predict_one(self, x):
        return np.dot(self.generate_powers(x), self.B)

    def predict(self, X):
        vectorized_predict_one = np.vectorize(self.predict_one)
        return vectorized_predict_one(X)


In [25]:
train_data=pd.read_csv('train.csv')

In [26]:
X=train_data['x'].to_numpy()
Y=train_data['y'].to_numpy()

In [41]:
regressor=HigherOrderRegressor(3)

In [42]:
regressor.fit(X,Y)

In [43]:
test_data=pd.read_csv('test.csv')

In [44]:
X_test=test_data['x'].to_numpy()

In [45]:
from sklearn.model_selection import train_test_split

In [46]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [47]:
regressor.fit(X_train,Y_train)

In [49]:
data=regressor.predict(X_val)

In [55]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [51]:
r2_score(Y_val, data)

0.8776829081113265

In [53]:
mean_squared_error(Y_val, data)

819.8538414941444

In [54]:
mean_absolute_error(Y_val, data)

22.557238245353524