# DocumentSet Class using mutable struct

In [11]:
function readFromFile(path::String)
    open(path) do file
        for line in eachline(file)
                addDocument(line)
        end
    end
end

function addDocument(line)
    if isempty(line)
        return nothing
    end
    words = split(line)
    println(words)
    codes = []
    for i in words
        if haskey(ds.vocabulary, i)
            push!(codes, ds.vocabulary[i])
        else 
            ds.vocab_count+=1
            ds.vocabulary[i] = ds.vocab_count
            push!(codes, ds.vocab_count)
        end
    end
    push!(ds.documents, codes)
    ds.document_size+=1
end
  
function getTermFreq()
    tf = [0 for i=1:ds.vocab_count]
    for doc in ds.documents
        for w in doc
            tf[w]+=1 
        end
    end
    return tf
end
    
mutable struct DocumentSet
    documents::Array
    document_size::Int
    vocab_count::Int
    vocabulary::Dict{}            
end

DocumentSet() = DocumentSet([],0, 0, Dict())
global ds = DocumentSet()

DocumentSet(Any[], 0, 0, Dict{Any,Any}())

In [12]:
readFromFile("dummy.txt")

SubString{String}["This", "is", "dummy", "line", "number", "1"]
SubString{String}["This", "is", "dummy", "line", "numebr", "2"]
SubString{String}["This", "is", "dummy", "line", "numebr", "3"]
SubString{String}["4th", "dummy", "line"]
SubString{String}["5th", "dummy", "line"]


In [13]:
ds.vocab_count

11

In [16]:
println(getTermFreq())

[3, 3, 5, 5, 1, 1, 2, 1, 1, 1, 1]


In [15]:
ds.documents

5-element Array{Any,1}:
 Any[1, 2, 3, 4, 5, 6]
 Any[1, 2, 3, 4, 7, 8]
 Any[1, 2, 3, 4, 7, 9]
 Any[10, 3, 4]
 Any[11, 3, 4]

In [13]:
a = Dict("a"=>1, "b"=>2, "c"=>3)

Dict{String,Int64} with 3 entries:
  "c" => 3
  "b" => 2
  "a" => 1

In [23]:
collect(a)

3-element Array{Pair{String,Int64},1}:
 "c" => 3
 "b" => 2
 "a" => 1

In [25]:
size(collect(a))[1]

3

In [None]:
haskey(mydict, "One")   