In [8]:
using IJulia, Plots, Distributed
using CSV, LinearAlgebra, OnlineStats, Random, Distributions
using SparseArrays, SharedArrays
import Base.zero
import SparseArrays.sparse

In [9]:
f = CSV.File("/Users/guillaume/Downloads/ml-latest-small/ratings.csv", use_mmap=true)

CSV.File("/Users/guillaume/Downloads/ml-latest-small/ratings.csv", rows=100836):
Tables.Schema:
 :userId     Union{Missing, Int64}  
 :movieId    Union{Missing, Int64}  
 :rating     Union{Missing, Float64}
 :timestamp  Union{Missing, Int64}  

In [None]:
fbig = CSV.File("/Users/guillaume/Downloads/ml-latest/ratings.csv", use_mmap=true)

In [10]:
mutable struct Rating
    itemId::Int64
    userId::Int64
    value::Float64
end

In [11]:
mutable struct Item
    weights::Vector{Float64}
    bias::Float64
end

mutable struct User
    weights::Vector{Float64}
    bias::Float64
end

In [12]:
mutable struct ITem{N}
    weights::Vector{Float64}
    bias::Float64
end

mutable struct USer{N}
    weights::Vector{Float64}
    bias::Float64
end

function Base.zero(t::Type{ITem{N}}) where {N}
    ITem{N}(zeros(Float64, N), zero(Float64))
end

function Base.zero(t::Type{USer{N}}) where {N}
    USer{N}(zeros(Float64, N), zero(Float64))
end

function Base.iszero(u::USer{N}) where {N}
    u.weights
end

# SGD

### WINNER!!!

In [None]:
function sgd(ratings::Vector{Rating}, k::Int64=10;
        nepochs::Int64=10,
        lr::Float64=0.01,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    μ::Float64 = value(fit!(Mean(), r.value for r in ratings))
    items::Dict{Int64,Item} = Dict{Int64,Item}()
    users::Dict{Int64,User} = Dict{Int64,User}()

    for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        for r::Rating in ratings
            if epoch == 1
                items[r.itemId] = Item(rand(Normal(0.0, 1e-4), k), 0.0)
                users[r.userId] = User(rand(Normal(0.0, 1e-4), k), 0.0)
            end
            item::Item = items[r.itemId]
            user::User = users[r.userId]

            e::Float64 = μ + item.bias + user.bias + dot(item.weights,user.weights) - r.value
            cost += abs2(e)
            
            item.weights -= lr*2*(e*user.weights + reg*item.weights)
            user.weights -= lr*2*(e*item.weights + reg*user.weights)
            item.bias -= lr*2*(e + reg*item.bias)
            user.bias -= lr*2*(e + reg*user.bias)
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    return items, users
end

In [None]:
function sgd2(ratings::Vector{Rating}, k::Int64=10;
        nepochs::Int64=10,
        lr::Float64=0.01,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    o = fit!(Group(Mean(), Extrema(Int64), Extrema(Int64)),
        ((r.value, r.itemId, r.userId) for r in ratings))
    
    μ::Float64 = value(o.stats[1])
    nitems = last(value(o.stats[2]))
    nusers = last(value(o.stats[3]))
    
    P::Matrix{Float64} = rand(Normal(0.0, 1e-4), nitems, k)
    Q::Matrix{Float64} = rand(Normal(0.0, 1e-4), nusers, k)
    
    bi::Vector{Float64} = zeros(Float64, nitems)
    bu::Vector{Float64} = zeros(Float64, nusers)

    for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        for r::Rating in ratings
            e::Float64 = μ + bi[r.itemId] + bu[r.userId] + dot(P[r.itemId,:],Q[r.userId,:]) - r.value
            cost += abs2(e)
            
            P[r.itemId,:] .-= lr*2*(e*Q[r.userId,:] + reg*P[r.itemId,:])
            Q[r.userId,:] .-= lr*2*(e*P[r.itemId,:] + reg*Q[r.userId,:])
            bi[r.itemId] -= lr*2*(e + reg*bi[r.itemId])
            bu[r.userId] -= lr*2*(e + reg*bu[r.userId])
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    return P, Q
end

In [None]:
function sgd3(ratings::Vector{Rating}, k::Int64=10;
        nepochs::Int64=10,
        lr::Float64=0.01,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    o = fit!(Group(Mean(), Extrema(Int64), Extrema(Int64)),
        ((r.value, r.itemId, r.userId) for r in ratings))
    
    μ::Float64 = value(o.stats[1])
    nitems = last(value(o.stats[2]))
    nusers = last(value(o.stats[3]))
    
    items::SparseVector{ITem{k},Int64} = spzeros(ITem{k}, nitems)
    users::SparseVector{USer{k},Int64} = spzeros(USer{k}, nusers)

    for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        for r::Rating in ratings
            if epoch == 1
                items[r.itemId] = ITem{k}(rand(Normal(0.0, 1e-4), k), 0.0)
                users[r.userId] = USer{k}(rand(Normal(0.0, 1e-4), k), 0.0)
            end
            item::ITem{k} = items[r.itemId]
            user::USer{k} = users[r.userId]

            e::Float64 = μ + item.bias + user.bias + dot(item.weights,user.weights) - r.value
            cost += abs2(e)
            
            item.weights -= lr*2*(e*user.weights + reg*item.weights)
            user.weights -= lr*2*(e*item.weights + reg*user.weights)
            item.bias -= lr*2*(e + reg*item.bias)
            user.bias -= lr*2*(e + reg*user.bias)
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    return items, users
end

In [None]:
let costs = [],
    ratings = [Rating(r.movieId, r.userId, r.rating) for r in f]
    
    @time items, users = sgd(ratings, 100;
        nepochs=100,
        lr=0.01,
        cb=(epoch, cost)->begin
            IJulia.clear_output(true)
            println("epoch: $(epoch), cost: $(cost)")
            push!(costs, cost)
            end);

    plot(costs)
end

In [None]:
let costs = [],
    ratings = [Rating(r.movieId, r.userId, r.rating) for r in f]

    @time P, Q = sgd2(ratings, 100;
        nepochs=100,
        lr=0.01,
        cb=(epoch, cost)->begin
            IJulia.clear_output(true)
            println("epoch: $(epoch), cost: $(cost)")
            push!(costs, cost)
            end);

    plot(costs)
end

In [None]:
let costs = [],
    ratings = [Rating(r.movieId, r.userId, r.rating) for r in f]

    @time items, users = sgd3(ratings, 100;
        nepochs=100,
        lr=0.01,
        cb=(epoch, cost)->begin
            IJulia.clear_output(true)
            println("epoch: $(epoch), cost: $(cost)")
            push!(costs, cost)
            end);

    plot(costs)
end

# OnlineStatsBase Interface

In [None]:
mutable struct LatentFactors <: OnlineStat{Rating}
    k::Int64
    rate::Float64
    reg::Float64
    std::Float64
    μ::Mean{EqualWeight}
    items::Dict{Int64,Item}
    users::Dict{Int64,User}
    loss::Float64
    n::Int
end

function LatentFactors(k::Int64;
        rate::Float64=0.01,
        reg::Float64=0.0,
        std::Float64=1e-4)
    
    μ::Mean{EqualWeight} = Mean()
    items::Dict{Int64,Item} = Dict{Int64,Item}()
    users::Dict{Int64,User} = Dict{Int64,User}()
    loss::Float64=zero(Float64)
    n::Int = 0
    
    LatentFactors(k, rate, reg, std, μ, items, users, loss, n)
end

function OnlineStatsBase._fit!(o::LatentFactors, r::Rating)
    fit!(o.μ, r.value)
    μ = value(o.μ)
    
    o.n += 1
    
    if !in(r.itemId, keys(o.items))
        o.items[r.itemId] = Item(rand(Normal(0.0, o.std), o.k), 0.0)
    end
    if !in(r.userId, keys(o.users))
        o.users[r.userId] = User(rand(Normal(0.0, o.std), o.k), 0.0)
    end
    
    item::Item = o.items[r.itemId]
    user::User = o.users[r.userId]

    e::Float64 = μ + item.bias + user.bias + dot(item.weights,user.weights) - r.value
    o.loss += abs2(e)

    item.weights -= o.rate*2*(e*user.weights + o.reg*item.weights)
    user.weights -= o.rate*2*(e*item.weights + o.reg*user.weights)
    item.bias -= o.rate*2*(e + o.reg*item.bias)
    user.bias -= o.rate*2*(e + o.reg*user.bias)
end

In [None]:
let costs=[],
    ratings = (Rating(r.movieId, r.userId, r.rating) for r in f),
    o = LatentFactors(100, rate=0.01)
    
    @time for epoch in 1:100
        o.loss = 0.0
        fit!(o, ratings)
        
        IJulia.clear_output(true)
        println("epoch: $(epoch), cost: $(o.loss)")
        push!(costs, o.loss)
    end
    
    plot(costs)
end

# ALS

### Winner!

In [6]:
function als(ratings, k::Int64=10;
        nepochs::Int64=10,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    #o = fit!(Group(Mean(), Series(Extrema(Int64), CountMap(Int64)), Series(Extrema(Int64), CountMap(Int64))),
    #    ((r.value, r.itemId, r.userId) for r in ratings))
    #stats = o.stats
    #
    #μ::Float64 = value(stats[1])
    #nitems::Int64 = last(value(stats[2][1]))
    #rated_items::Vector{Int64} = sort(collect(keys(stats[2][2])))
    #nusers::Int64 = last(value(o.stats[3][1]))
    #rating_users::Vector{Int64} = sort(collect(keys(stats[3][2])))
    
    R::SparseMatrixCSC{Float64,Int64} = let items::Vector{Int64} = Int64[],
                                            users::Vector{Int64} = Int64[],
                                            values::Vector{Float64} = Float64[]
        
        for r in ratings
            push!(items, r.itemId)
            push!(users, r.userId)
            push!(values, r.value)
        end
        sparse(items, users, values)
    end
    
    P::SharedMatrix{Float64} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.m, k))
    Q::SharedMatrix{Float64} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.n, k))
    
    items_bias::SharedVector{Float64} = SharedVector{Float64}(zeros(Float64, R.m))
    users_bias::SharedVector{Float64} = SharedVector{Float64}(zeros(Float64, R.n))
    
    rated_items::Vector{Int64} = unique(sort(findnz(R)[1]))
    rating_users::Vector{Int64} = unique(sort(findnz(R)[2]))
    
    #user_channel = RemoteChannel(()->Channel{Int64}(32))
    #item_channel = RemoteChannel(()->Channel{Int64}(32))
    
    #=@async=# for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        
        @sync @distributed for u in rating_users
            items_rated_by_user = R[:,u].nzind
            ratings_given_by_user = R[:,u].nzval
            A = P[items_rated_by_user,:]
            b = ratings_given_by_user
            Q[u,:] = Symmetric(A'A + reg*I) \ (A'b)
            #put!(user_channel, u)
        end
        
        @sync @distributed for i in rated_items
            users_who_rated_item = R[i,:].nzind
            ratings_given_to_item = R[i,:].nzval
            A = Q[users_who_rated_item,:]
            b = ratings_given_to_item
            P[i,:] = Symmetric(A'A + reg*I) \ (A'b)
            #put!(item_channel, i)
        end
        
        i, u, v = findnz(R)
        for n in 1:nnz(R)
            cost += abs2(dot(P[i[n],:], Q[u[n],:]) - v[n])
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    #=begin
        for epoch::Int64 in 1:nepochs
            count_users::Int64 = 0
            total_users::Int64 = length(rating_users)
            for u in rating_users
                done::Int64 = take!(user_channel)
                count_users += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed users: $(count_users)/$(total_users)")
            end

            count_items::Int64 = 0
            total_items::Int64 = length(rated_items)
            for i in rated_items
                done::Int64 = take!(item_channel)
                count_items += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed items: $(count_items)/$(total_items)")
            end
        end
    end=#
    
    return P, Q
end

als (generic function with 2 methods)

### TODO

- Constraint P to length of rated_items
- Constraint Q to length of rating_users

In [13]:
function alsbiased(ratings, k::Int64=10;
        nepochs::Int64=10,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    #o = fit!(Group(Mean(), Series(Extrema(Int64), CountMap(Int64)), Series(Extrema(Int64), CountMap(Int64))),
    #    ((r.value, r.itemId, r.userId) for r in ratings))
    #stats = o.stats
    #
    #μ::Float64 = value(stats[1])
    #nitems::Int64 = last(value(stats[2][1]))
    #rated_items::Vector{Int64} = sort(collect(keys(stats[2][2])))
    #nusers::Int64 = last(value(o.stats[3][1]))
    #rating_users::Vector{Int64} = sort(collect(keys(stats[3][2])))
    
    o = Mean()
    R::SparseMatrixCSC{Float64,Int64} = let items::Vector{Int64} = Int64[],
                                            users::Vector{Int64} = Int64[],
                                            values::Vector{Float64} = Float64[]
        
        for r in ratings
            push!(items, r.itemId)
            push!(users, r.userId)
            push!(values, r.value)
            fit!(o, r.value)
        end
        sparse(items, users, values)
    end
    μ::Float64 = value(o)
    
    P::SharedMatrix{Float64} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.m, k))
    Q::SharedMatrix{Float64} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.n, k))
    
    items_bias::SharedVector{Float64} = SharedVector{Float64}(zeros(Float64, R.m))
    users_bias::SharedVector{Float64} = SharedVector{Float64}(zeros(Float64, R.n))
    
    rated_items::Vector{Int64} = unique(sort(findnz(R)[1]))
    rating_users::Vector{Int64} = unique(sort(findnz(R)[2]))
    
    println((R.m, maximum(rated_items), length(rated_items), size(P, 1)))
    
    #user_channel = RemoteChannel(()->Channel{Int64}(32))
    #item_channel = RemoteChannel(()->Channel{Int64}(32))
    
    #=@async=# for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        
        @sync @distributed for u in rating_users
            items_rated_by_user = R[:,u].nzind
            ratings_given_by_user = R[:,u].nzval
            A = [ones(length(items_rated_by_user)) P[items_rated_by_user,:]]
            b = ratings_given_by_user .- μ .- items_bias[items_rated_by_user]
            x = Symmetric(A'A + reg*I) \ (A'b)
            users_bias[u] = x[1]
            Q[u,:] .= x[2:end]
            #put!(user_channel, u)
        end
        
        @sync @distributed for i in rated_items
            users_who_rated_item = R[i,:].nzind
            ratings_given_to_item = R[i,:].nzval
            A = [ones(length(users_who_rated_item)) Q[users_who_rated_item,:]]
            b = ratings_given_to_item .- μ .- users_bias[users_who_rated_item]
            x = Symmetric(A'A + reg*I) \ (A'b)
            items_bias[i] = x[1]
            P[i,:] .= x[2:end]
            #put!(item_channel, i)
        end
        
        i, u, v = findnz(R)
        for n in 1:nnz(R)
            cost += abs2(dot(P[i[n],:], Q[u[n],:]) + μ + items_bias[i[n]] + users_bias[u[n]] - v[n])
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    #=begin
        for epoch::Int64 in 1:nepochs
            count_users::Int64 = 0
            total_users::Int64 = length(rating_users)
            for u in rating_users
                done::Int64 = take!(user_channel)
                count_users += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed users: $(count_users)/$(total_users)")
            end

            count_items::Int64 = 0
            total_items::Int64 = length(rated_items)
            for i in rated_items
                done::Int64 = take!(item_channel)
                count_items += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed items: $(count_items)/$(total_items)")
            end
        end
    end=#
    
    return P, Q
end

alsbiased (generic function with 2 methods)

In [None]:
function als2(ratings, k::Int64=10;
        nepochs::Int64=10,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    #o = fit!(Group(Mean(), Series(Extrema(Int64), CountMap(Int64)), Series(Extrema(Int64), CountMap(Int64))),
    #    ((r.value, r.itemId, r.userId) for r in ratings))
    #stats = o.stats
    #
    #μ::Float64 = value(stats[1])
    #nitems::Int64 = last(value(stats[2][1]))
    #rated_items::Vector{Int64} = sort(collect(keys(stats[2][2])))
    #nusers::Int64 = last(value(o.stats[3][1]))
    #rating_users::Vector{Int64} = sort(collect(keys(stats[3][2])))
    
    R::SparseMatrixCSC{Float64,Int64} = let items::Vector{Int64} = Int64[],
                                            users::Vector{Int64} = Int64[],
                                            values::Vector{Float64} = Float64[]
        
        for r in ratings
            push!(items, r.itemId)
            push!(users, r.userId)
            push!(values, r.value)
        end
        sparse(items, users, values)
    end
    
    P::SharedArray{Float64,2} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.m, k))
    Q::SharedArray{Float64,2} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.n, k))
    
    rated_items::Vector{Int64} = unique(sort(findnz(R)[1]))
    rating_users::Vector{Int64} = unique(sort(findnz(R)[2]))
    
    user_channel = RemoteChannel(()->Channel{Int64}(32))
    item_channel = RemoteChannel(()->Channel{Int64}(32))
    
    @async for epoch::Int64 in 1:nepochs
        cost::Float64=0.0
        
        @sync @distributed for u in rating_users
            items_rated_by_user = R[:,u].nzind
            ratings_given_by_user = R[:,u].nzval
            o = fit!(LinReg(), (P[items_rated_by_user,:], ratings_given_by_user))
            Q[u,:] = coef(o)
            put!(user_channel, u)
        end
        
        @sync @distributed for i in rated_items
            users_who_rated_item = R[i,:].nzind
            ratings_given_to_item = R[i,:].nzval
            o = fit!(LinReg(), (Q[users_who_rated_item,:], ratings_given_to_item))
            P[i,:] = coef(o)
            put!(item_channel, i)
        end
        
        i, u, v = findnz(R)
        for n in 1:nnz(R)
            cost += abs2(dot(P[i[n],:], Q[u[n],:]) - v[n])
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    begin
        for epoch::Int64 in 1:nepochs
            count_users::Int64 = 0
            total_users::Int64 = length(rating_users)
            for u in rating_users
                done::Int64 = take!(user_channel)
                count_users += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed users: $(count_users)/$(total_users)")
            end

            count_items::Int64 = 0
            total_items::Int64 = length(rated_items)
            for i in rated_items
                done::Int64 = take!(item_channel)
                count_items += 1
                IJulia.clear_output(true)
                println("epoch: $(epoch), processed items: $(count_items)/$(total_items)")
            end
        end
    end
    
    return P, Q
end

In [None]:
function als3(ratings, k::Int64=10;
        nepochs::Int64=10,
        reg::Float64=0.0,
        cb::Union{Nothing, Function}=nothing)
    
    R::SparseMatrixCSC{Float64,Int64} = let items::Vector{Int64} = Int64[],
                                            users::Vector{Int64} = Int64[],
                                            values::Vector{Float64} = Float64[]
        
        for r in ratings
            push!(items, r.itemId)
            push!(users, r.userId)
            push!(values, r.value)
        end
        sparse(items, users, values)
    end
    
    @everywhere R = $R
    
    P::SharedArray{Float64,2} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.m, k))
    Q::SharedArray{Float64,2} = SharedMatrix{Float64}(rand(Normal(0.0, 1e-4), R.n, k))
    
    rated_items::Vector{Int64} = unique(sort(findnz(R)[1]))
    rating_users::Vector{Int64} = unique(sort(findnz(R)[2]))
    
    function ls_users(u::Int64)
        items_rated_by_user = R[:,u].nzind
        ratings_given_by_user = R[:,u].nzval
        A = P[items_rated_by_user,:]
        b = ratings_given_by_user
        Q[u,:] = Symmetric(A'A + reg*I) \ (A'b)
    end
    
    function ls_items(i::Int64)
        users_who_rated_item = R[i,:].nzind
        ratings_given_to_item = R[i,:].nzval
        A = Q[users_who_rated_item,:]
        b = ratings_given_to_item
        P[i,:] = Symmetric(A'A + reg*I) \ (A'b)
    end
    
    for epoch::Int64 in 1:nepochs
        pmap(ls_users, rating_users)
        pmap(ls_items, rated_items)
        
        cost::Float64=0.0
        i, u, v = findnz(R)
        for n in 1:nnz(R)
            cost += abs2(dot(P[i[n],:], Q[u[n],:]) - v[n])
        end
        
        if cb !== nothing
            cb(epoch, cost)
        end
    end
    
    return P, Q
end

In [None]:
rmprocs(workers())

In [None]:
addprocs(4)
println(workers())

@everywhere using SparseArrays, SharedArrays, LinearAlgebra, Distributed

costs = []
ratings = (Rating(r.movieId, r.userId, r.rating) for r in f)
    
@time P, Q = als(ratings, 100;
    nepochs=10,
    reg=0.001,
    cb=(epoch, cost)->begin
        IJulia.clear_output(true)
        println("epoch: $(epoch), cost: $(cost)")
        push!(costs, cost)
        end)

#plot(costs)

rmprocs(workers());

In [None]:
plot(costs)

In [16]:
rmprocs(workers())

└ @ Distributed /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/cluster.jl:928


Task (done) @0x0000000112db9690

In [20]:
addprocs(4)
println(workers())

@everywhere using SparseArrays, SharedArrays, LinearAlgebra, Distributed

costs = []
ratings = (Rating(r.movieId, r.userId, r.rating) for r in f)
    
@time P, Q = alsbiased(ratings, 100;
    nepochs=10,
    reg=0.000000001,
    cb=(epoch, cost)->begin
        IJulia.clear_output(true)
        println("epoch: $(epoch), cost: $(cost)")
        push!(costs, cost)
        end)

#plot(costs)

rmprocs(workers());

epoch: 10, cost: 422.94189729920004
 28.980796 seconds (3.19 M allocations: 1.904 GiB, 1.84% gc time)


In [None]:
plot(costs)

In [None]:
addprocs(4)
println(workers())

@everywhere using SparseArrays, SharedArrays, LinearAlgebra, Distributed, OnlineStats

costs = []
ratings = (Rating(r.movieId, r.userId, r.rating) for r in f)
    
@time P, Q = als2(ratings, 100;
    nepochs=4,
    reg=0.001,
    cb=(epoch, cost)->begin
        IJulia.clear_output(true)
        println("epoch: $(epoch), cost: $(cost)")
        push!(costs, cost)
        end)

#plot(costs)

rmprocs(workers());

In [None]:
addprocs(4)
println(workers())

@everywhere using SparseArrays, SharedArrays, LinearAlgebra, Distributed, OnlineStats

costs = []
ratings = (Rating(r.movieId, r.userId, r.rating) for r in f)
    
@time P, Q = als3(ratings, 100;
    nepochs=4,
    reg=0.001,
    cb=(epoch, cost)->begin
        IJulia.clear_output(true)
        println("epoch: $(epoch), cost: $(cost)")
        push!(costs, cost)
        end)

#plot(costs)

rmprocs(workers());

### Sparse from Ratings

In [None]:
function sparse(ratings)
    A::SparseMatrixCSC{Float64,Int64} = spzeros(0,0)
    for r in ratings
        if (r.itemId > A.m) || (r.userId > A.n)
            B::SparseMatrixCSC{Float64,Int64} = spzeros(max(A.m, r.itemId), max(A.n, r.userId))
            B[1:A.m, 1:A.n] = A
            A = B
        end
        A[r.itemId, r.userId] = r.value
    end
    A
end

In [None]:
@time sparse(Rating(r.movieId, r.userId, r.rating) for r in f)

In [None]:
function sparse2(ratings)
    R::SparseMatrixCSC{Float64,Int64} = let items::Vector{Int64} = Int64[],
                                            users::Vector{Int64} = Int64[],
                                            values::Vector{Float64} = Float64[]
        
        for r in ratings
            push!(items, r.itemId)
            push!(users, r.userId)
            push!(values, r.value)
        end
        sparse(items, users, values)
    end
    R
end

In [None]:
@time sparse2(Rating(r.movieId, r.userId, r.rating) for r in f)

In [None]:
@time sparse2(Rating(r.movieId, r.userId, r.rating) for r in fbig)