Skip to content

Commit

Permalink
add function to ETL quotes from various sources. remove duplicates, e…
Browse files Browse the repository at this point in the history
…liminate null, nil and "unknown" #1
  • Loading branch information
nelsonic committed Oct 12, 2019
1 parent 8758f01 commit c938ebc
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions lib/clean.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defmodule Clean do
def remove_noise do
{:ok, data} = File.read("quotes_raw.json")
list = Jason.decode!(data)
IO.inspect Enum.count(list)
clean = list
|> Enum.uniq
|> Enum.map(fn q ->
if q["author"] != "" && q["author"] != nil do
q
end
end)
|> Enum.filter(& !is_nil(&1))
|> Enum.sort_by(fn q ->
q["author"]
end)

IO.inspect Enum.count(clean)
File.write!("quotes.json", Jason.encode!(clean))
end
end

0 comments on commit c938ebc

Please sign in to comment.