In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.linear_model import LinearRegression, Ridge

np.random.seed(1337)

# Data

In [2]:
np.set_printoptions(suppress=True)


# load the data from a filepath
data_filepath = 'data/train.csv'

# create the dataframe
raw_df = pd.read_csv(data_filepath, float_precision="round_trip")
display(raw_df.head())

# Shuffle the data
shuffled_df = raw_df.sample(frac=1)

# Split into features and labels (still dataframes)
X_df = shuffled_df.iloc[:, 2:]
y_df = shuffled_df.iloc[:, 1]

display(X_df.head())
display(y_df.head())

# Create numpy arrays from the pandas dataframes
X = X_df.to_numpy()
y = y_df.to_numpy()

print(X.shape, y.shape)

Unnamed: 0,Id,y,x1,x2,x3,x4,x5
0,0,3.57962,0.02,0.05,-0.09,-0.43,-0.08
1,1,0.734869,-0.13,0.11,-0.08,-0.29,-0.03
2,2,4.287433,0.08,0.06,-0.07,-0.41,-0.03
3,3,8.347827,0.02,-0.12,0.01,-0.43,-0.02
4,4,-0.052718,-0.14,-0.12,-0.08,-0.02,-0.08


Unnamed: 0,x1,x2,x3,x4,x5
107,-0.26,0.0,-0.01,-0.38,-0.06
622,-0.2,-0.18,0.0,-0.12,-0.08
47,-0.07,-0.17,-0.04,-0.27,-0.08
492,0.14,0.17,-0.18,-0.2,-0.02
25,-0.05,-0.05,-0.04,-0.05,-0.02


107     2.144951
622    10.414371
47      6.156960
492     1.748840
25      1.663411
Name: y, dtype: float64

(700, 5) (700,)


## Feature transformations

* Linear

  $$\phi_1(\mathbf{x}) = x_1,~ \phi_2(\mathbf{x}) = x_2,~ \phi_3(\mathbf{x}) = x_3,~ \phi_4(\mathbf{x}) = x_4,~ \phi_5(\mathbf{x}) = x_5,~$$
  
* Quadratic

  $$\phi_6(\mathbf{x}) = x^2_1,~ \phi_7(\mathbf{x}) = x^2_2,~ \phi_8(\mathbf{x}) = x^2_3,~ \phi_9(\mathbf{x}) = x^2_4,~ \phi_{10}(\mathbf{x}) = x^2_5,~$$

* Exponential

    $$\phi_{11}(\mathbf{x}) = e^{x_1},~ \phi_{12}(\mathbf{x}) = e^{x_2},~ \phi_{13}(\mathbf{x}) = e^{x_3},~ \phi_{14}(\mathbf{x}) = e^{x_4},~ \phi_{15}(\mathbf{x}) = e^{x_5}$$
    
* Cosine

    $$\phi_{16}(\mathbf{x}) = \textrm{cos}(x_1),~ \phi_{17}(\mathbf{x}) = \textrm{cos}(x_2),~ \phi_{18}(\mathbf{x}) = \textrm{cos}(x_3),~ \phi_{19}(\mathbf{x}) = \textrm{cos}(x_4),~ \phi_{20}(\mathbf{x}) = \textrm{cos}(x_5)$$

* Constant

    $$\phi_{21}(\mathbf{x})=1$$

In [3]:
def transform_features(data):
    linear = data.copy()

    quadratic = np.multiply(data, data)

    exponential = np.exp(data)

    cosine = np.cos(data)

    constant = np.ones(700).reshape(700, 1)

    return np.hstack((linear, quadratic, exponential, cosine, constant))

In [4]:
X = transform_features(X)

display(X.shape)
display(X[0, :])

(700, 21)

array([-0.26      ,  0.        , -0.01      , -0.38      , -0.06      ,
        0.0676    ,  0.        ,  0.0001    ,  0.1444    ,  0.0036    ,
        0.77105159,  1.        ,  0.99004983,  0.68386141,  0.94176453,
        0.96638998,  1.        ,  0.99995   ,  0.92866464,  0.99820054,
        1.        ])

# Linear regression
## Training

In [5]:
reg = Ridge(alpha=1, fit_intercept=False).fit(X, y)

In [6]:
display(reg.coef_)
display(reg.score(X, y))

array([ 1.29212527, -0.34609352,  1.08139671,  1.75272917,  1.26865007,
       -1.455314  , -2.54022248, -0.04567181,  0.72561501,  1.28742921,
        0.23309222, -1.74835185,  0.75830334,  2.0344196 ,  1.53017826,
        0.44611499,  0.98755929, -0.24532562, -0.63113936, -0.89821635,
       -0.27009702])

0.01604841748054553

In [7]:
np.savetxt('submission.csv', reg.coef_, delimiter='\n', fmt="%.12f")