In [1]:
import DelimitedFiles, Statistics, Random

In [10]:
rng = Random.MersenneTwister(convert(Int64, rand(1:2e10)));

In [3]:
data = DelimitedFiles.readdlm("housing.data")

506×14 Array{Float64,2}:
 0.00632  18.0   2.31  0.0  0.538  6.575  …  296.0  15.3  396.9    4.98  24.0
 0.02731   0.0   7.07  0.0  0.469  6.421     242.0  17.8  396.9    9.14  21.6
 0.02729   0.0   7.07  0.0  0.469  7.185     242.0  17.8  392.83   4.03  34.7
 0.03237   0.0   2.18  0.0  0.458  6.998     222.0  18.7  394.63   2.94  33.4
 0.06905   0.0   2.18  0.0  0.458  7.147     222.0  18.7  396.9    5.33  36.2
 0.02985   0.0   2.18  0.0  0.458  6.43   …  222.0  18.7  394.12   5.21  28.7
 0.08829  12.5   7.87  0.0  0.524  6.012     311.0  15.2  395.6   12.43  22.9
 0.14455  12.5   7.87  0.0  0.524  6.172     311.0  15.2  396.9   19.15  27.1
 0.21124  12.5   7.87  0.0  0.524  5.631     311.0  15.2  386.63  29.93  16.5
 0.17004  12.5   7.87  0.0  0.524  6.004     311.0  15.2  386.71  17.1   18.9
 0.22489  12.5   7.87  0.0  0.524  6.377  …  311.0  15.2  392.52  20.45  15.0
 0.11747  12.5   7.87  0.0  0.524  6.009     311.0  15.2  396.9   13.27  18.9
 0.09378  12.5   7.87  0.0  0.524  5.88

In [7]:
median_house_price = transpose(data[:,end])

1×506 LinearAlgebra.Transpose{Float64,Array{Float64,1}}:
 24.0  21.6  34.7  33.4  36.2  28.7  …  16.8  22.4  20.6  23.9  22.0  11.9

In [6]:
housing_data = transpose(data[:,1:end-1])

13×506 LinearAlgebra.Transpose{Float64,Array{Float64,2}}:
   0.00632    0.02731    0.02729  …    0.06076    0.10959    0.04741
  18.0        0.0        0.0           0.0        0.0        0.0    
   2.31       7.07       7.07         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.538      0.469      0.469         0.573      0.573      0.573  
   6.575      6.421      7.185    …    6.976      6.794      6.03   
  65.2       78.9       61.1          91.0       89.3       80.8    
   4.09       4.9671     4.9671        2.1675     2.3889     2.505  
   1.0        2.0        2.0           1.0        1.0        1.0    
 296.0      242.0      242.0         273.0      273.0      273.0    
  15.3       17.8       17.8      …   21.0       21.0       21.0    
 396.9      396.9      392.83        396.9      393.45     396.9    
   4.98       9.14       4.03          5.64       6.48       7.88   

In [24]:
rows, cols = size(housing_data)
for r in 1:rows
    row_mean = Statistics.mean(housing_data[r,:])
    row_std = Statistics.std(housing_data[r,:])
    housing_data[r,:] = (housing_data[r,:] .- row_mean) ./ row_std
end

In [314]:
n_train = 400

# Random.seed!(1)
idx_perm = Random.randperm(rng, cols)
idx_train = idx_perm[1:n_train]
idx_test = idx_perm[n_train+1:end]

housing_data_train = housing_data[:,idx_train];
housing_data_test = housing_data[:,idx_test];
median_house_price_train = median_house_price[idx_train];
median_house_price_test = median_house_price[idx_test];

In [315]:
weights_and_bias = (transpose(0.1 .* randn(13)), [0.0])

([0.142606 0.0952386 … -0.0170649 -0.0645505], [0.0])

In [316]:
function predict(housing_data, weights_and_bias)
    weights = weights_and_bias[1]
    bias = weights_and_bias[2]
    return (weights * housing_data) .+ bias
end

predict (generic function with 1 method)

In [317]:
median_house_price_pred = predict(housing_data_train, weights_and_bias)

1×400 Array{Float64,2}:
 -0.247671  -0.00529168  -0.467821  …  0.187936  0.13619  0.0992343

In [333]:
function loss(input, output, weights_and_bias)
    N = size(input)[2]
    return (1 / (2N)) * sum((predict(input, weights_and_bias) .- transpose(output)).^2)
end

loss (generic function with 2 methods)

In [319]:
@show loss(housing_data_train, median_house_price_train, weights_and_bias);
@show loss(housing_data_test, median_house_price_test, weights_and_bias);

loss(housing_data_train, median_house_price_train, weights_and_bias) = 291.91905130729555
loss(housing_data_test, median_house_price_test, weights_and_bias) = 316.2400623922435


In [320]:
loss_value = √(loss(housing_data_train, median_house_price_train, weights_and_bias));
abs_err = abs.(median_house_price_pred - transpose(median_house_price_train));
n_better = sum(map(x -> x<loss_value, abs_err))

@show n_better;

n_better = 100


In [201]:
using Printf
using Knet
@doc Knet.AutoGrad

Usage:

```
x = Param([1,2,3])          # user declares parameters
x => P([1,2,3])             # they are wrapped in a struct
value(x) => [1,2,3]         # we can get the original value
sum(abs2,x) => 14           # they act like regular values outside of differentiation
y = @diff sum(abs2,x)       # if you want the gradients
y => T(14)                  # you get another struct
value(y) => 14              # which represents the same value
grad(y,x) => [2,4,6]        # but also contains gradients for all Params
```

`Param(x)` returns a struct that acts like `x` but marks it as a parameter you want to compute gradients with respect to.

`@diff expr` evaluates an expression and returns a struct that contains its value (which should be a scalar) and gradient information.

`grad(y, x)` returns the gradient of `y` (output by @diff) with respect to any parameter `x::Param`, or  `nothing` if the gradient is 0.

`value(x)` returns the value associated with `x` if `x` is a `Param` or the output of `@diff`, otherwise returns `x`.

`params(x)` returns an array of Params found by a recursive search of object `x`.

Alternative usage:

```
x = [1 2 3]
f(x) = sum(abs2, x)
f(x) => 14
grad(f)(x) => [2 4 6]
gradloss(f)(x) => ([2 4 6], 14)
```

Given a scalar valued function `f`, `grad(f,argnum=1)` returns another function `g` which takes the same inputs as `f` and returns the gradient of the output with respect to the argnum'th argument. `gradloss` is similar except the resulting function also returns f's output.


In [344]:
weights_and_bias = (transpose(0.1 .* randn(13)), [0.0])

η = 0.1  # Learning rate
# n = 25   # Mini-batch size
N = size(housing_data_train)[2]
epoch = 0
max_epoch = 50

while loss(housing_data_test, median_house_price_test, weights_and_bias) ≥ 8.5
    weights = weights_and_bias[1]
    bias = weights_and_bias[2]
    
    idx_rand = rand(rng, 1:N)
    x = Param(housing_data_train[:,idx_rand])
    y = @diff abs(predict(x, weights_and_bias)[1] - median_house_price_train[idx_rand])
    weights = weights - η*transpose(grad(y,x))
    weights_and_bias = (weights, bias)
    
    train_loss = loss(housing_data_train, median_house_price_train, weights_and_bias);
    test_loss = loss(housing_data_test, median_house_price_test, weights_and_bias);
    @printf("epoch=%3d \t train_loss=%6g \t test_loss=%6g\n", epoch, train_loss, test_loss)
    
    epoch ≥ max_epoch && break
    epoch = epoch+1
end

epoch=  0 	 train_loss=290.166 	 test_loss=317.698
epoch=  1 	 train_loss=290.132 	 test_loss=317.786
epoch=  2 	 train_loss=290.096 	 test_loss=317.884
epoch=  3 	 train_loss=290.059 	 test_loss=317.993
epoch=  4 	 train_loss=290.019 	 test_loss=318.115
epoch=  5 	 train_loss=289.977 	 test_loss=318.251
epoch=  6 	 train_loss=289.935 	 test_loss=318.403
epoch=  7 	 train_loss=289.891 	 test_loss=318.573
epoch=  8 	 train_loss=289.846 	 test_loss=318.763
epoch=  9 	 train_loss=289.802 	 test_loss=318.977
epoch= 10 	 train_loss=289.76 	 test_loss=319.217
epoch= 11 	 train_loss=289.72 	 test_loss=319.488
epoch= 12 	 train_loss=289.685 	 test_loss=319.793
epoch= 13 	 train_loss=289.656 	 test_loss=320.138
epoch= 14 	 train_loss=289.637 	 test_loss=320.528
epoch= 15 	 train_loss=289.632 	 test_loss=320.97
epoch= 16 	 train_loss=289.643 	 test_loss=321.473
epoch= 17 	 train_loss=289.679 	 test_loss=322.045
epoch= 18 	 train_loss=289.744 	 test_loss=322.699
epoch= 19 	 train_loss=289.848 	 t

Obviously I dont have the objective function or something...