In [70]:
import os
import numpy as np

from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold

# SETTINGS

np.random.seed(42)
np.set_printoptions(precision=14)


In [71]:
def get_data():

    ## train & test paths
    data_dir = "./data/"

    ### csv w/ layout: "y,x1,x2,x3,x4,x5"
    train_file = "train.csv"
    data_path = data_dir + train_file
    
    data_type = np.double # np dtype object for values in X and Y

    X = np.genfromtxt(data_path,
                        dtype = data_type,
                        delimiter = ",",
                        skip_header = 1,
                        usecols = np.arange(2, 5 + 2))

    y = np.genfromtxt(data_path,
                        dtype = data_type,
                        delimiter = ",",
                        skip_header = 1,
                        usecols = np.arange(1, 1 + 1))

    return X, y

In [72]:
def save_submission(submission):

    ## prediction submission paths
    sub_dir = "./submission/"
    sub_file = "submission.csv"
    sub_path = sub_dir + sub_file


    if not os.path.isdir(sub_dir):
        os.mkdir(sub_dir)


    ## submission csv header
    sub_header = ""
    sub_format = "%.13f"
    np.savetxt(sub_path,
               submission,
               fmt = sub_format,
               delimiter = ',',
               header = sub_header,
               comments = '')

In [73]:
X, y = get_data()



def get_phi(X):
    phi_1_5 = lambda x: x
    phi_6_10 = lambda x: np.square(x)
    phi_11_15 = lambda x: np.exp(x)
    phi_16_20 = lambda x: np.cos(x)
    phi_21 = lambda x: np.array([1])

    phi = []
    for x in X:
        phi_x = np.concatenate([phi_(x) for phi_ in [phi_1_5, phi_6_10, phi_11_15, phi_16_20, phi_21]])
        # print(phi_x)
        phi.append(phi_x)

    return phi

phi = get_phi(X)

alpha = 0
model = Ridge(alpha = alpha)

model.fit(phi, y)

print("model.coef_", model.coef_)

save_submission(model.coef_)
y_pred = model.predict(phi)

print(y_pred)


score = model.score(phi, y)
print(score)

train_error = mean_squared_error(y, y_pred, squared=False)
print(train_error)

model.coef_ [   87.19891672193933   -67.2701086799054     28.53822964871449
   158.0755039245784   -819.6964295673276    200.04958806703658
  -393.5417564245975   1953.2621601196656   -523.6328881716265
  1518.9342216717039    -86.60565699120242    65.34027497660219
   -30.99213080209313  -155.77167728747193   822.1080966769574
   327.10998800300666  -737.6402033879466   3890.670312200643
 -1211.2283119665387   3799.821751507599       0.              ]
[-6.42055426261413 -6.62196076232249 -6.42819811233676 -6.1974166252985
 -6.17860663950341 -6.07286772071075 -6.06618491857716 -6.22251477946702
 -6.79591820223595 -6.20282842999677 -6.09333175083702 -6.17783169740687
 -6.44229239819651 -6.04712967000705 -6.26012974324476 -6.28551786116441
 -6.07537491741368 -6.29609656570938 -5.88322750273437 -6.50656185523621
 -6.03868000544207 -6.15997771510956 -6.54560885168212 -6.49258770633969
 -6.33046570866281 -6.35966991057285 -5.93705448119363 -6.34754122190043
 -6.62695025887842 -6.23807554755