Skip to content

Commit

Permalink
Merge pull request #2 from oxinabox/master
Browse files Browse the repository at this point in the history
Add Sensekeys
  • Loading branch information
jbn committed Aug 31, 2016
2 parents 3f75a37 + f1ead46 commit 2943968
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Expand Up @@ -4,8 +4,8 @@ os:
- linux
- osx
julia:
- 0.3
- 0.4
- release
- nightly
notifications:
email: false
Expand All @@ -16,4 +16,4 @@ script:
- julia -e 'Pkg.test("WordNet"; coverage=true)'
after_success:
- julia -e 'cd(Pkg.dir("WordNet")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'


18 changes: 18 additions & 0 deletions README.md
Expand Up @@ -52,6 +52,24 @@ expanded_hypernyms(db, synsets(db, db['n', "DOG"])[1])
> (n) physical entity (an entity that has physical existence)
> (n) entity (that which is perceived or known or inferred to have its own distinct existence (living or nonliving))
```julia
sensekey(db,synsets(db,db['n',"cat"])[1], db['n',"cat"])
```
> "cat%1:05:00::"
```julia
sensekeys(db,db['n',"cat"])
```
>8-element Array{SubString{String},1}:
>"cat%1:05:00::"
>"cat%1:18:01::"
>"cat%1:18:00::"
>"cat%1:06:02::"
>"cat%1:06:00::"
>"cat%1:06:01::"
>"cat%1:05:02::"
>"cat%1:04:00::"

## Design consideration

Expand Down
2 changes: 1 addition & 1 deletion REQUIRE
@@ -1,3 +1,3 @@
julia 0.3
julia 0.4
Compat
FactCheck
6 changes: 4 additions & 2 deletions appveyor.yml
@@ -1,7 +1,9 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.3/julia-0.3-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.3/julia-0.3-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"

Expand Down
3 changes: 2 additions & 1 deletion src/WordNet.jl
Expand Up @@ -8,5 +8,6 @@ include("constants.jl")
include("synset.jl")
include("db.jl")
include("operations.jl")
include("sensekeys.jl")

end
end
25 changes: 23 additions & 2 deletions src/db.jl
Expand Up @@ -3,15 +3,18 @@ export DB
immutable DB
lemmas::Dict{Char, Dict{AbstractString, Lemma}}
synsets::Dict{Char, Dict{Int, Synset}}
sensekeys::Dict{Tuple{Int,AbstractString}, AbstractString}
end

function DB(base_dir::AbstractString)
DB(load_lemmas(base_dir), load_synsets(base_dir))
DB(load_lemmas(base_dir),
load_synsets(base_dir),
load_sensekeys(base_dir))
end

Base.show(io::IO, db::DB) = print(io, "WordNet.DB")

function Base.getindex(db::DB, pos::Char, word::AbstractString)
function Base.getindex(db::DB, pos::Char, word::AbstractString)
db.lemmas[pos][lowercase(word)]
end

Expand Down Expand Up @@ -57,6 +60,24 @@ function load_synsets(base_dir)
synsets
end


function load_sensekeys(basedir)
path=joinpath(basedir, "dict","index.sense")
sensekeys = Dict{Tuple{Int64,AbstractString},AbstractString}()

for line in eachline(path)
full_key, offset_str, sense_num_str, tagcount_str = split(line)
lemma_name = first(split(full_key,'%'))
sense_offset = parse(Int64, offset_str)
index = (sense_offset,lemma_name)
@assert(!haskey(sensekeys, index))
sensekeys[index] = full_key
end

sensekeys
end


function path_to_data_file(base_dir, pos)
joinpath(base_dir, "dict", "data.$(SYNSET_TYPES[pos])")
end
Expand Down
9 changes: 9 additions & 0 deletions src/sensekeys.jl
@@ -0,0 +1,9 @@
export sensekey, sensekeys

function sensekey(db::DB, ss::Synset, lem::Lemma)
db.sensekeys[(ss.offset,lem.word)]
end

function sensekeys(db::DB, lem::Lemma)
[db.sensekeys[(ss_offset,lem.word)] for ss_offset in lem.synset_offsets]
end
1 change: 1 addition & 0 deletions test/mock_db/dict/index.noun
Expand Up @@ -28,3 +28,4 @@
28 any associated documentation shall at all times remain with
29 Princeton University and LICENSEE agrees to preserve same.
'hood n 1 2 @ ; 1 0 08641944
section n 1 2 @ ; 1 0 08648322
1 change: 1 addition & 0 deletions test/mock_db/dict/index.sense
@@ -0,0 +1 @@
section%1:15:01:: 08648322 3 11
1 change: 1 addition & 0 deletions test/runtests.jl
Expand Up @@ -13,3 +13,4 @@ include("test_db.jl")
include("test_lemma.jl")
include("test_synset.jl")
include("test_operations.jl")
include("test_sensekeys.jl")
3 changes: 2 additions & 1 deletion test/test_db.jl
@@ -1,7 +1,8 @@
facts("DB") do
const mock_db = DB(
Dict{Char, Dict{AbstractString, Lemma}}(),
Dict{Char, Dict{Int, Synset}}()
Dict{Char, Dict{Int, Synset}}(),
Dict{Tuple{Int,AbstractString}, AbstractString}()
)

context("path_to_data_file") do
Expand Down
10 changes: 10 additions & 0 deletions test/test_sensekeys.jl
@@ -0,0 +1,10 @@
facts("sensekeys") do
const mock_db = DB(joinpath(dirname(@__FILE__), "mock_db"))


lem = mock_db["section",'n']
ss = synsets(mock_db, lem)
@fact sensekeys(mock_db, lem) --> ["section%1:15:01::"]
@fact sensekey(mock_db, ss[1], lem) --> "section%1:15:01::"

end

0 comments on commit 2943968

Please sign in to comment.