### RSME score using random seed 9

In [186]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt

In [187]:
df = pd.read_csv('laptops.csv')

In [188]:
df.columns = df.columns.str.lower().str.replace(' ', '_')

In [189]:
df = df[['ram','storage','screen','final_price']]
df

Unnamed: 0,ram,storage,screen,final_price
0,8,512,15.6,1009.00
1,8,256,15.6,299.00
2,8,256,15.6,789.00
3,16,1000,15.6,1199.00
4,16,512,15.6,669.01
...,...,...,...,...
2155,16,1000,17.3,2699.99
2156,16,1000,17.3,2899.99
2157,32,1000,17.3,3399.99
2158,16,1000,13.4,1899.99


In [190]:
# Missing values with zero
df['screen'] = df['screen'].fillna(0)

In [191]:
np.random.seed(9)

n = len(df)

n_val = int(0.2 * n)
n_test = int(0.2 * n)
n_train = n - (n_val + n_test)

idx = np.arange(n)
np.random.shuffle(idx)

df_shuffled = df.iloc[idx]

df_train = df_shuffled.iloc[:n_train].copy()
df_val = df_shuffled.iloc[n_train:n_train+n_val].copy()
df_test = df_shuffled.iloc[n_train+n_val:].copy()

In [192]:
df_train

Unnamed: 0,ram,storage,screen,final_price
2003,32,1000,15.6,1592.89
502,8,512,13.3,1349.00
896,8,512,15.6,549.00
356,32,1000,17.3,2999.00
499,16,1000,16.0,1889.00
...,...,...,...,...
993,16,1000,15.6,1849.00
1815,16,512,15.6,794.74
349,8,128,15.6,429.01
1150,16,1000,17.3,1599.00


In [193]:
y_train_orig = df_train.final_price.values
y_val_orig = df_val.final_price.values
y_test_orig = df_test.final_price.values

y_train = df_train.final_price.values
y_val = df_val.final_price.values
y_test = df_test.final_price.values

del df_train['final_price']
del df_val['final_price']
del df_test['final_price']

X_train = df_train.values
X_val = df_val.values
X_test = df_test.values

In [194]:
def train_linear_regression_reg(X, y, r=0.0):
    ones = np.ones(X.shape[0])
    X = np.column_stack([ones, X])

    XTX = X.T.dot(X)
    reg = r * np.eye(XTX.shape[0])
    XTX = XTX + reg

    XTX_inv = np.linalg.inv(XTX)
    w = XTX_inv.dot(X.T).dot(y)
    
    return w[0], w[1:]

In [195]:
def rmse(y, y_pred):
    error = y - y_pred
    mse = (error ** 2).mean()
    return np.sqrt(mse)

In [196]:
X_full = np.concatenate([df_train, df_val])

In [197]:
y_full = np.concatenate([y_train, y_val])

In [198]:
w_0, w = train_linear_regression_reg(X_full, y_full, r=0.01)

In [199]:
y_pred = w_0 + X_test.dot(w)

In [200]:
rmse(y_test,y_pred)

608.607313988294