Skip to content

Commit

Permalink
Merge pull request #29 from oltarasenko/24-improve-tests
Browse files Browse the repository at this point in the history
standardized existing pipeline testing
  • Loading branch information
oltarasenko committed Dec 6, 2019
2 parents c79d714 + e40c02f commit e04dbe6
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 142 deletions.
File renamed without changes.
126 changes: 2 additions & 124 deletions test/data_storage_worker_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ defmodule DataStorageWorkerTest do

on_exit(fn ->
:meck.unload()

:ok =
DynamicSupervisor.terminate_child(Crawly.DataStorage.WorkersSup, pid)
end)
Expand All @@ -25,7 +26,7 @@ defmodule DataStorageWorkerTest do
{:stored_items, 1} = Crawly.DataStorage.stats(context.crawler)
end

test "Duplicates are not stored", context do
test "Dropped item are not stored", context do
Crawly.DataStorage.store(context.crawler, %{
title: "test title",
author: "me",
Expand All @@ -43,58 +44,6 @@ defmodule DataStorageWorkerTest do
{:stored_items, 1} = Crawly.DataStorage.stats(context.crawler)
end

test "Items without all required fields are dropped", context do
Crawly.DataStorage.store(context.crawler, %{
author: "me",
time: "Now",
url: "http://example.com"
})

{:stored_items, 0} = Crawly.DataStorage.stats(context.crawler)
end

test "Items without all required fields are dropped nils", context do
Crawly.DataStorage.store(context.crawler, %{
title: "title",
author: nil,
time: "Now",
url: "http://example.com"
})

{:stored_items, 0} = Crawly.DataStorage.stats(context.crawler)
end

test "Items are stored in JSON after json_encoder pipeline", context do
test_pid = self()
:meck.new(EchoPipeline, [:non_strict])
:meck.expect(
EchoPipeline,
:run,
fn item, state ->
send(test_pid, item)
{item, state}
end)

Application.put_env(:crawly, :pipelines, [
Crawly.Pipelines.Validate,
Crawly.Pipelines.DuplicatesFilter,
Crawly.Pipelines.JSONEncoder,
EchoPipeline
])

item = %{
title: "test_title",
author: "me",
time: "Now",
url: "http://example.com"
}

:ok = Crawly.DataStorage.store(context.crawler, item)
_expected_item = Poison.encode!(item)

assert_receive("{\"url\":\"http://example.com\",\"title\":\"test_title\",\"time\":\"Now\",\"author\":\"me\"}")
end

test "Starting child worker twice", context do
result = Crawly.DataStorage.start_worker(context.crawler)
assert result == {:error, :already_started}
Expand All @@ -104,75 +53,4 @@ defmodule DataStorageWorkerTest do
result = Crawly.DataStorage.stats(:unkown)
assert result == {:error, :data_storage_worker_not_running}
end

test "Duplicates pipline is inactive when item_id is not set", context do
:meck.expect(Application, :get_env, fn :crawly, :item_id -> :undefined end)

Crawly.DataStorage.store(context.crawler, %{
title: "test title",
author: "me",
time: "Now",
url: "http://example.com"
})

Crawly.DataStorage.store(context.crawler, %{
title: "test title",
author: "me",
time: "Now",
url: "http://example.com"
})

Process.sleep(1000)
{:stored_items, 2} = Crawly.DataStorage.stats(context.crawler)
:meck.unload(Application)
end

describe "CSV encoder test" do
setup do
name = :test_crawler_csv
{:ok, pid} = Crawly.DataStorage.start_worker(name)

on_exit(fn ->
Application.put_env(:crawly, :pipelines, [
Crawly.Pipelines.Validate,
Crawly.Pipelines.DuplicatesFilter,
Crawly.Pipelines.JSONEncoder
])

:ok =
DynamicSupervisor.terminate_child(Crawly.DataStorage.WorkersSup, pid)
end)

{:ok, %{crawler: name}}
end

test "Items are stored in CSV after csv pipeline", context do
test_pid = self()
:meck.new(EchoPipeline, [:non_strict])
:meck.expect(
EchoPipeline,
:run,
fn item, state ->
send(test_pid, item)
{item, state}
end)

Application.put_env(:crawly, :pipelines, [
Crawly.Pipelines.Validate,
Crawly.Pipelines.DuplicatesFilter,
Crawly.Pipelines.CSVEncoder,
EchoPipeline
])

item = %{
title: "test_title",
author: "me",
time: "Now",
url: "url"
}

:ok = Crawly.DataStorage.store(context.crawler, item)
assert_receive("\"test_title\",\"me\",\"Now\",\"url\"")
end
end
end
23 changes: 23 additions & 0 deletions test/pipelines/csv_encoder_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
defmodule Pipelines.CSVEncoderTest do
use ExUnit.Case, async: false

@valid %{first: "some", second: "data"}
setup do
on_exit(fn ->
Application.put_env(:crawly, :item, [:title, :author, :time, :url])
end)
end

test "Converts a single-level map to a csv string" do
Application.put_env(:crawly, :item, [:first, :second])

pipelines = [Crawly.Pipelines.CSVEncoder]
item = @valid
state = %{}

{item, _state} = Crawly.Utils.pipe(pipelines, item, state)

assert is_binary(item)
assert item == ~S("some","data")
end
end
45 changes: 45 additions & 0 deletions test/pipelines/duplicates_filter_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
defmodule Pipelines.DuplicatesFilterTest do
use ExUnit.Case, async: false

@valid %{data: [%{some: "nested_data"}], id: "my_id"}
setup do
on_exit(fn ->
Application.put_env(:crawly, :item_id, :title)
end)
end

test "Drops duplicate items with the same item_id value" do
Application.put_env(:crawly, :item_id, :id)
pipelines = [Crawly.Pipelines.DuplicatesFilter]
item = @valid
state = %{}

{item, state} = Crawly.Utils.pipe(pipelines, item, state)

# filter state is updated
assert %{"my_id" => true} = state.duplicates_filter
# unchanged
assert item == @valid

# run again with same item and updated state should drop the item
assert {false, state} = Crawly.Utils.pipe(pipelines, item, state)
end

test "Inactive when item_id is not set" do
pipelines = [Crawly.Pipelines.DuplicatesFilter]
item = @valid
state = %{}

{item, state} = Crawly.Utils.pipe(pipelines, item, state)

# filter state is not updated
assert Map.has_key?(state, :duplicates_filter) == false

# run with same item and updated state should not drop the item
assert {%{} = item, state} = Crawly.Utils.pipe(pipelines, item, state)
assert Map.has_key?(state, :duplicates_filter) == false

# unchanged
assert item == @valid
end
end
18 changes: 18 additions & 0 deletions test/pipelines/json_encoder_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
defmodule Pipelines.JSONEncoderTest do
use ExUnit.Case, async: false

@valid %{data: [%{some: "nested_data"}]}

test "Converts a given map to a json string" do
pipelines = [Crawly.Pipelines.JSONEncoder]
item = @valid
state = %{}

{item, _state} = Crawly.Utils.pipe(pipelines, item, state)

assert is_binary(item)
assert item =~ @valid.data |> hd() |> Map.get(:some)
assert item =~ "data"
assert item =~ "some"
end
end
49 changes: 49 additions & 0 deletions test/pipelines/validate_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
defmodule Pipelines.ValidateTest do
use ExUnit.Case, async: false

@valid %{
title: "title",
author: "data"
}
@invalid_missing %{
title: "title"
}
@invalid_nil %{
title: "title",
author: nil
}

setup do
on_exit(fn ->
Application.put_env(:crawly, :item, [:title, :author, :time, :url])
end)
end

test "Returns item unchanged when has required fields" do
Application.put_env(:crawly, :item, [:title, :author])
pipelines = [Crawly.Pipelines.Validate]
item = @valid
state = %{}

{item, _state} = Crawly.Utils.pipe(pipelines, item, state)
assert item == @valid
end

test "Drops items when missing required fields" do
Application.put_env(:crawly, :item, [:title, :author])
pipelines = [Crawly.Pipelines.Validate]
item = @invalid_missing
state = %{}

{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
end

test "Drops items when required fields are equal to nil" do
Application.put_env(:crawly, :item, [:title, :author])
pipelines = [Crawly.Pipelines.Validate]
item = @invalid_nil
state = %{}

{false, _state} = Crawly.Utils.pipe(pipelines, item, state)
end
end
43 changes: 25 additions & 18 deletions test/pipelines/write_to_file_test.exs
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
defmodule Pipelines.WriteToFileTest do
use ExUnit.Case, async: false

# creates a string with a unique timestamp
@binary "some binary to write to the csv #{:os.system_time(:seconds)}"
@binary "Some binary"

test "WriteToFile writes a given item to a file", _context do
Application.put_env(:crawly, Crawly.Pipelines.WriteToFile,
setup do
on_exit(
fn ->
Application.put_env(:crawly, :'Crawly.Pipelines.WriteToFile', nil)
end
)
end

test "Writes a given item to a file", _context do
test_pid = self()
:meck.expect(
IO,
:write,
fn (_, item) ->
send(test_pid, item)
:ok
end
)

Application.put_env(
:crawly,
Crawly.Pipelines.WriteToFile,
folder: "/tmp",
extension: "csv"
)
Expand All @@ -19,21 +38,9 @@ defmodule Pipelines.WriteToFileTest do
state = %{spider_name: MySpider}

# run the pipeline
{item, %{write_to_file_fd: fd} = state} =
_result =
Crawly.Utils.pipe(pipelines, item, state)

# write changes to the file
File.close(fd)

# returns the same item
assert item == @binary

# file descriptor is set in state
assert state.write_to_file_fd

# assert changes to the file
tmp_dir = System.tmp_dir!()
output_file_path = Path.join(tmp_dir, "MySpider.csv")
assert File.read!(output_file_path) =~ @binary
assert_receive @binary
end
end

0 comments on commit e04dbe6

Please sign in to comment.