# Bag of Words (BOW) Model

# Imports

In [1]:
using Knet: Param, @diff, grad, params
using Random: shuffle!
# using Pkg; Pkg.add("CUDA")
using CUDA: CUDA, CuArray, functional

# Data Pre-processing

In [2]:
function readdata(file)
    data = []
    for line in eachline(file)
        tag, sentence = split(strip(lowercase(line)), " ||| ")
        wordids = w2i.(split(sentence))
        tagid = t2i(tag)
        push!(data, (wordids, tagid))
    end
    return data
end

readdata (generic function with 1 method)

In [3]:
wdict = Dict()
tdict = Dict()
w2i(x) = get!(wdict, x, 1+length(wdict))
t2i(x) = get!(tdict, x, 1+length(tdict))
UNK = w2i("<unk>")

1

In [4]:
trn = readdata("/Users/emrecanacikgoz/Desktop/Comp442/data/classes/train.txt")
first(trn)

([2, 3, 4, 5, 6, 7, 2, 8, 9, 10  …  25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 1)

In [5]:
# Before reading the dev/test data, we change the word->id function to return UNK 
# for unknown words and tag->id function to error for unknown tags.
w2i(x) = get(wdict, x, UNK)     # unk if not found
t2i(x) = tdict[x]               # error if not found

t2i (generic function with 1 method)

In [6]:
dev = readdata("/Users/emrecanacikgoz/Desktop/Comp442/data/classes/dev.txt")
tst = readdata("/Users/emrecanacikgoz/Desktop/Comp442/data/classes/test.txt")
length.((trn, dev, tst))

(8544, 1101, 2210)

# Check GPU

In [7]:
if CUDA.functional()
    param(dims...) = Param(CuArray(0.01f0 * randn(Float32, dims...)))
else
    param(dims...) = Param(Array(0.01f0 * randn(Float32, dims...)))
end

param (generic function with 1 method)

# Model

In [8]:
num_words = length(wdict)
num_tags = length(tdict)
W = param(num_tags, num_words)
b = param(num_tags)

5-element Param{Vector{Float32}}:
 -0.0015436991
 -0.010080987
  0.003832759
  0.012655715
  0.017913343

# Training

In [9]:
# prediction function for the BOW model:
pred(words) = b .+ sum(W[:,words], dims=2)

pred (generic function with 1 method)

In [10]:
# loss function for the BOW model:
function loss(words, tag)
    scores = pred(words)
    logprobs = scores .- log(sum(exp.(scores)))
    -logprobs[tag]
end

loss (generic function with 1 method)

In [11]:
# report accuracy during training
accuracy(data) = sum(argmax(pred(x))[1] == y for (x,y) in data) / length(data)

accuracy (generic function with 1 method)

In [12]:
function train(; nepochs = 10, lr = 0.01)
    for epoch in 1:nepochs
        shuffle!(trn)
        for (x,y) in trn
            ∇loss = @diff loss(x,y)
            for p in params(∇loss)
                p .= p - lr * grad(∇loss, p)
            end
        end
        println((epoch = epoch, trn = accuracy(trn), dev = accuracy(dev)))
    end
end

train (generic function with 1 method)

In [13]:
train()

(epoch = 1, trn = 0.43562734082397003, dev = 0.3369663941871026)
(epoch = 2, trn = 0.49929775280898875, dev = 0.3751135331516803)
(epoch = 3, trn = 0.5568820224719101, dev = 0.3760217983651226)
(epoch = 4, trn = 0.5775983146067416, dev = 0.3605812897366031)
(epoch = 5, trn = 0.6216058052434457, dev = 0.36693914623069934)
(epoch = 6, trn = 0.5786516853932584, dev = 0.38419618528610355)
(epoch = 7, trn = 0.6558988764044944, dev = 0.3860127157129882)
(epoch = 8, trn = 0.6891385767790262, dev = 0.3760217983651226)
(epoch = 9, trn = 0.7112593632958801, dev = 0.3760217983651226)
(epoch = 10, trn = 0.6876170411985019, dev = 0.3760217983651226)
