In [1]:
#imports
using Flux, RDatasets, Plots
plotly()

┌ Info: CUDAdrv.jl failed to initialize, GPU functionality unavailable (set JULIA_CUDA_SILENT or JULIA_CUDA_VERBOSE to silence or expand this message)
└ @ CUDAdrv C:\Users\Anush kumar.v\.julia\packages\CUDAdrv\aBgcd\src\CUDAdrv.jl:69
┌ Info: For saving to png with the Plotly backend ORCA has to be installed.
└ @ Plots C:\Users\Anush kumar.v\.julia\packages\Plots\qZHsp\src\backends.jl:363


Plots.PlotlyBackend()

In [2]:
# load dataset. Here we will use built-in Boston housing prices dataset using RDatasets package
boston_housing_prices = dataset("MASS","Boston")
println("Number of columns ",ncol(boston_housing_prices)) # print no of columns
println("Number of rows ",nrow(boston_housing_prices)) # print no of rows
println(first(boston_housing_prices[:,1:7],5)) # Lets print the first 5 rows
println("All column names ",names(boston_housing_prices)) # This will give us the names of all the columns
println(describe(boston_housing_prices)) # Initial exploration

Number of columns 14
Number of rows 506
5×7 DataFrame
│ Row │ Crim    │ Zn      │ Indus   │ Chas  │ NOx     │ Rm      │ Age     │
│     │ [90mFloat64[39m │ [90mFloat64[39m │ [90mFloat64[39m │ [90mInt64[39m │ [90mFloat64[39m │ [90mFloat64[39m │ [90mFloat64[39m │
├─────┼─────────┼─────────┼─────────┼───────┼─────────┼─────────┼─────────┤
│ 1   │ 0.00632 │ 18.0    │ 2.31    │ 0     │ 0.538   │ 6.575   │ 65.2    │
│ 2   │ 0.02731 │ 0.0     │ 7.07    │ 0     │ 0.469   │ 6.421   │ 78.9    │
│ 3   │ 0.02729 │ 0.0     │ 7.07    │ 0     │ 0.469   │ 7.185   │ 61.1    │
│ 4   │ 0.03237 │ 0.0     │ 2.18    │ 0     │ 0.458   │ 6.998   │ 45.8    │
│ 5   │ 0.06905 │ 0.0     │ 2.18    │ 0     │ 0.458   │ 7.147   │ 54.2    │
All column names Symbol[:Crim, :Zn, :Indus, :Chas, :NOx, :Rm, :Age, :Dis, :Rad, :Tax, :PTRatio, :Black, :LStat, :MedV]
14×8 DataFrame
│ Row │ variable │ mean     │ min     │ median  │ max     │ nunique │ nmissing │ eltype   │
│     │ [90mSymbol[39m   │ [90mFloat64

In [3]:
# To learn Regression, for now, lets consider 2 columns in the above dataset
x = boston_housing_prices[:,6]
y = boston_housing_prices[:,14]
# types of variables
println(typeof(boston_housing_prices)) 
println(typeof(x))
println(typeof(y))
# Lets plot x and y
plot(x,y,seriestype=:scatter,title="Room size vs avg price")

DataFrame
Array{Float64,1}
Array{Float64,1}


In [None]:
"""
Regression is similar to interpolation and extrapolation we learnt in mathematics. Its basically curve fitting
It helps in identifying the pattern of the underlying data
Flux is an ML library in Julia which I would like to call a neural network programming language
It helps is finding autmatically finding out gradients of functions and taking gradients is one of the main reason why ML exists
For the given diagram above lets try to fit a simple line
We all know that the equation of a line is given by y = Mx + C
"""

In [111]:
# initialize weights with random numbers
w = [rand()]
b = [rand()]
println("w ",w," b ",b)

# prediction function
predict(x) = w .* x .+ b

# mean square error loss function
function loss_custom(x,y)
    y_hat = predict(x)
    sum((y .- y_hat).^2)
end

# collect gradients
grads = gradient(() -> loss_custom([x[1]], [y[1]]), params(w, b))
println("gradient of w wrt mse ",grads[w])
println("gradient of b wrt mse ",grads[b])

w [0.585015] b [0.558916]
gradient of w wrt mse [-257.669]
gradient of b wrt mse [-39.1892]


In [112]:
function plot_graph(msg)
    x_generated = [item for item in 4:0.1:9]
    p = prediction(x_generated)
    scatter(x,y)
    plot!(x_generated,p,linewidth=3,title=msg)
end
plot_graph("Initial fit before training")

In [113]:
epochs = 50
learning_rate = 0.01
println("before training")
println("w ",w," b ",b)
for i in epochs
    for item in zip(x,y)
        # collect gradients
        grads = gradient(() -> loss_custom([item[1]], [item[2]]), params(w, b))
        # update gradients
        w = w .- learning_rate .* grads[w]
        b = b .- learning_rate .* grads[b]
    end
end
println("after training")
println("w ",w," b ",b)

before training
w [0.585015] b [0.558916]
after training
w [2.30029] b [-0.0230149]


In [114]:
plot_graph(" fit after training")