In [21]:
using DelimitedFiles, Statistics, Random

In [22]:
#download data, name it "housingdata"
download("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", "housingdata")

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 49082  100 49082    0     0  75125      0 --:--:-- --:--:-- --:--:-- 75163


"housingdata"

In [23]:
#store data as 506x14 array
housingdata = readdlm("housingdata")

#take the transpose, it's now 14x506
housingdata = transpose(housingdata)

y = housingdata[14,:]
y = reshape(y, (1,506)) #1 x 506

x = housingdata[1:13,:] #13 x 506

13×506 Array{Float64,2}:
   0.00632    0.02731    0.02729  …    0.06076    0.10959    0.04741
  18.0        0.0        0.0           0.0        0.0        0.0    
   2.31       7.07       7.07         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.538      0.469      0.469         0.573      0.573      0.573  
   6.575      6.421      7.185    …    6.976      6.794      6.03   
  65.2       78.9       61.1          91.0       89.3       80.8    
   4.09       4.9671     4.9671        2.1675     2.3889     2.505  
   1.0        2.0        2.0           1.0        1.0        1.0    
 296.0      242.0      242.0         273.0      273.0      273.0    
  15.3       17.8       17.8      …   21.0       21.0       21.0    
 396.9      396.9      392.83        396.9      393.45     396.9    
   4.98       9.14       4.03          5.64       6.48       7.88   

In [24]:
#3: Normalize x
means = mean(x, dims=2)
stds = std(x, dims=2)
x = broadcast(/, broadcast(-, x, means), stds)

13×506 Array{Float64,2}:
 -0.419367  -0.416927  -0.416929  …  -0.413038  -0.407361  -0.41459 
  0.284548  -0.48724   -0.48724      -0.48724   -0.48724   -0.48724 
 -1.28664   -0.592794  -0.592794      0.115624   0.115624   0.115624
 -0.272329  -0.272329  -0.272329     -0.272329  -0.272329  -0.272329
 -0.144075  -0.73953   -0.73953       0.157968   0.157968   0.157968
  0.413263   0.194082   1.28145   …   0.983986   0.724955  -0.362408
 -0.119895   0.366803  -0.265549      0.796661   0.736268   0.434302
  0.140075   0.556609   0.556609     -0.772919  -0.667776  -0.61264 
 -0.981871  -0.867024  -0.867024     -0.981871  -0.981871  -0.981871
 -0.665949  -0.986353  -0.986353     -0.802418  -0.802418  -0.802418
 -1.45756   -0.302794  -0.302794  …   1.1753     1.1753     1.1753  
  0.440616   0.440616   0.396035      0.440616   0.402826   0.440616
 -1.0745    -0.491953  -1.20753      -0.982076  -0.864446  -0.668397

In [25]:
#4: Randomly split data into training and testing sets
Random.seed!(1)
perms = randperm(506)
xtrn = x[:, perms[1:400]]
ytrn = y[:, perms[1:400]]
xtst = x[:, perms[401:506]]
ytst = y[:, perms[401:506]]

1×106 Array{Float64,2}:
 20.3  27.5  22.0  30.7  19.4  24.5  …  24.8  42.3  16.3  19.1  20.3  29.8

In [26]:
#5: Initialize weight matrix
w_std = 0.1
w = w_std * randn(1,13)

1×13 Array{Float64,2}:
 0.0426637  0.0378471  -0.169487  …  -0.0208622  -0.066244  -0.0257344

In [27]:
#6: Predict 
function predict(weight, input)
    weight * input
end

ypred = predict(w, xtrn)

1×400 Array{Float64,2}:
 0.227025  -0.0623266  -0.0604358  …  0.207288  0.0592333  -0.0221864

In [28]:
#7: Loss Function
function loss(weight, input, truth)
    N = size(truth)[2]
    prediction = predict(weight, input)
    v = map(x->x^2, prediction - truth)
    sum(v)/(2*N)
end

loss(w, xtrn, ytrn), loss(w, xtst, ytst)

(297.3036503276775, 299.0172855668773)

In [29]:
#8: count
function counter(predictions, truth)
    diff = abs.(truth - predictions)
    l = sqrt(loss(w, xtrn, ytrn))
    c = (x->x<l).(diff)
    Base.count(x->x==true, c)
end

counter(ypred, ytrn)

108