# Detect Sentyment

Nural Network for evaluating sentyment of movie reviews.

Activate the environment.

In [1]:
using Pkg
Pkg.activate(".")

[32m[1m  Activating[22m[39m project at `~/LocalDocuments/Studia/myAutomaticDifferentiation/SimpleNuralNetwork`


Load supporting code.

In [2]:
include("./src/ComputationalGraph.jl")
include("./src/Printing.jl")
include("./src/GraphBuilding.jl")
include("./src/ForwardPass.jl")
include("./src/BackwardPass.jl")
include("./src/ScalarOperations.jl")
include("./src/BroadcastOperations.jl")
include("./src/other_rules.jl")

backward (generic function with 13 methods)

# Testing

In [3]:
x = Variable(5.0, name="x")
two = Constant(2.0)
squared = x^two
sine = sin(squared)

order = topological_sort(sine)

4-element Vector{Any}:
 var x
 ┣━ ^ Float64
 ┗━ ∇ Nothing
 const 2.0
 op ?(typeof(^))
 op ?(typeof(sin))

In [4]:
y = forward!(order)

-0.13235175009777303

In [5]:
backward!(order)

In [6]:
x.gradient

9.912028118634735

# Nural Network

In [7]:
function dense(w, b, x, activation) return activation(w * x .+ b) end
function dense(w, x, activation) return activation(w * x) end
function dense(w, x) return w * x end

dense (generic function with 3 methods)

In [9]:
using JLD2
X_train = load("./data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("./data/imdb_dataset_prepared.jld2", "y_train")
X_test = load("./data/imdb_dataset_prepared.jld2", "X_test")
y_test = load("./data/imdb_dataset_prepared.jld2", "y_test")
nothing

In [11]:
input_neurons = size(X_train, 1)
hidden_neurons = 32
output_neurons = 1

1

In [31]:
using LinearAlgebra

In [16]:
wh = Variable(randn(hidden_neurons, input_neurons), name="wh")
wo = Variable(randn(output_neurons, hidden_neurons), name="wo")
bh = Variable(zeros(hidden_neurons), name="bh")
bo = Variable(zeros(output_neurons), name="bo")
x = Variable(zeros(input_neurons), name="x")
y = Variable(zeros(output_neurons), name="y")

epochs = 5
nothing

For now instead of using Adam optimizer I will use a simple gradient descent optimizer.

In [17]:
η = 0.001

0.001

In [15]:
binary_cross_entropy_loss(y, ŷ) = sum(Constant(-1.0) .* (y .* log.(ŷ) .+ (Constant(1.0) .- y) .* log.(Constant(1.0) .- ŷ)))
ReLU(x) = max.(Constant(0.0), x)

ReLU (generic function with 1 method)

In [18]:
function net(x, wh, bh, wo, bo)
  x̂ = dense(wh, bh, x, ReLU)
  x̂.name = "x̂"
  ŷ = dense(wo, bo, x̂, σ)
  ŷ.name = "ŷ"
  return ŷ
end

net (generic function with 1 method)

In [22]:
function loss(x, y, wh, bh, wo, bo)
  ŷ = net(x, wh, bh, wo, bo)
  E = binary_cross_entropy_loss(y, ŷ); E.name = "loss"
  return E
end

loss (generic function with 1 method)

In [26]:
graph = topological_sort(loss(x, y, wh, bh, wo, bo))
nothing

In [28]:
using Flux, Printf, Statistics
dataset = Flux.DataLoader((X_train, y_train), batchsize=64, shuffle=true)

125-element DataLoader(::Tuple{LinearAlgebra.Adjoint{Float32, Matrix{Float32}}, BitMatrix}, shuffle=true, batchsize=64)
  with first element:
  (17703×64 Matrix{Float32}, 1×64 BitMatrix,)

In [None]:
function test(X, Y, indices)
  L = 0.0
  for j in indices
    x.output .= X[:, j]
    y.output .= [Y[j]]
    L += forward!(graph)
  end
  return L / length(indices)
end

function loss_fn(m_func, x_val, y_val)
    x.output .= x_val
    y.output .= y_val
    return forward!(graph)
end

function accuracy(x_vals, y_vals)
    correct = 0
    total = size(x_vals, 2)

    for i in 1:total
        x.output .= x_vals[:, i]
        forward!(graph)

        prediction = graph[end].output[1] > 0.5
        actual = y_vals[i] > 0.5
        if prediction == actual
            correct += 1
        end
    end

    return correct / total
end

function train_batch(X_batch, y_batch)
    batch_size = size(X_batch, 2)
    batch_loss = 0.0

    ∇wh = zeros(hidden_neurons, input_neurons)
    ∇wo = zeros(output_neurons, hidden_neurons)
    ∇bh = zeros(hidden_neurons)
    ∇bo = zeros(output_neurons)

    for j = 1:batch_size
        current_loss = loss_fn(nothing, X_batch[:, j], [y_batch[j]])
        batch_loss += current_loss

        backward!(graph)

        ∇wh .+= wh.gradient
        ∇wo .+= wo.gradient
        ∇bh .+= bh.gradient
        ∇bo .+= bo.gradient
    end

    wh.output .-= η / batch_size * ∇wh
    bh.output .-= η / batch_size * ∇bh
    wo.output .-= η / batch_size * ∇wo
    bo.output .-= η / batch_size * ∇bo

    return batch_loss / batch_size
end

for epoch in 1:epochs
    total_loss = 0.0
    total_acc = 0.0
    num_batches = 0

    t = @elapsed begin
        for (x_batch, y_batch) in dataset
            batch_loss = train_batch(x_batch, y_batch)
            batch_acc = accuracy(x_batch, y_batch)

            total_loss += batch_loss
            total_acc += batch_acc
            num_batches += 1
        end

        train_loss = total_loss / num_batches
        train_acc = total_acc / num_batches

        test_acc = accuracy(X_test, y_test)

        test_loss = 0.0
        for i in test_indices
            test_loss += loss_fn(nothing, X_test[:, i], [y_test[i]])
        end
        test_loss /= length(test_indices)
    end

    @printf("Epoch: %d (%.2fs) \tTrain: (l: %.4f, a: %.4f) \tTest: (l: %.4f, a: %.4f)\n", 
        epoch, t, train_loss, train_acc, test_loss, test_acc)
end

Epoch: 1 (29.38s) 	Train: (l: 1.1338, a: 0.4993) 	Test: (l: 1.0614, a: 0.5020)
Epoch: 2 (30.90s) 	Train: (l: 1.1277, a: 0.5028) 	Test: (l: 1.0556, a: 0.5025)
Epoch: 3 (30.17s) 	Train: (l: 1.1218, a: 0.5016) 	Test: (l: 1.0499, a: 0.5050)
Epoch: 4 (30.48s) 	Train: (l: 1.1161, a: 0.5054) 	Test: (l: 1.0442, a: 0.5020)
Epoch: 5 (31.66s) 	Train: (l: 1.1105, a: 0.5101) 	Test: (l: 1.0386, a: 0.5030)
Epoch: 3 (30.17s) 	Train: (l: 1.1218, a: 0.5016) 	Test: (l: 1.0499, a: 0.5050)
Epoch: 4 (30.48s) 	Train: (l: 1.1161, a: 0.5054) 	Test: (l: 1.0442, a: 0.5020)
Epoch: 5 (31.66s) 	Train: (l: 1.1105, a: 0.5101) 	Test: (l: 1.0386, a: 0.5030)
