## 路径还原与问题提交

`include("../src/submit.jl")` 导入工具

调用 `submit(predict)` 将预测数据转化为提交格式

In [2]:
using DataStructures

In [3]:
include("../src/translatedata.jl")
# include("../src/deduction.jl")
include("../src/submit.jl")
mkpath("submit")

"submit"

In [None]:
# 读取预测结果
submit_id = 13
sols = split(strip(read(open("predict_data.txt", "r"), String)), '\n')
temp = submit(sols[1])
fails = String[]
open("submit/submit_$(submit_id).txt", "w") do io
    println(io, "id\tans_path")
    for (i, predict) in enumerate(sols)
        line = temp
        try
            line = submit(predict)
        catch
            # 没有合适解的情况
            push!(fails, predict)
        end
        println(io, i-1, '\t', line)
    end
end

## 调试内容

### 三元组 => 提交形式

#### 函数

In [2]:
# 三元组还原
en_standard = Dict((=>).(en_triples_lower, raw_en_triples))
zh_standard = Dict((=>).(zh_triples_lower, raw_zh_triples))

Dict{Tuple{String, String, String}, Tuple{String, String, String}} with 104903 entries:
  ("amhara", "settlementtype",… => ("阿姆哈拉州", "settlementType", "埃塞俄比亚…
  ("dai_cunyi", "parents", "hu… => ("戴存义", "parents", "戴德生")
  ("dunn_center,_north_dakota"… => ("邓恩森特_(北达科他州)", "subdivisionType",…
  ("giorgio_moroder", "genre",… => ("乔吉奥·莫罗德尔", "genre", "Dance_(music)")
  ("south_china_football_team"… => ("南華足球隊", "league", "香港超級聯賽")
  ("zulfikar_ali_bhutto", "cou… => ("佐勒菲卡尔·阿里·布托", "country", "巴基斯坦…
  ("ariarathes_ii_of_cappadoci… => ("阿里阿拉特二世", "after", "阿里阿拉內斯二世…
  ("catholic_archdiocese_of_sa… => ("天主教聖路易斯波托西總教區", "territory", …
  ("paul_easton", "monarch", "… => ("保羅·伊斯臣", "monarch", "威廉三世_(尼德蘭…
  ("timothy_harris", "office",… => ("提摩西·哈里斯", "office", "圣基茨和尼维斯总…
  ("edward_viii", "successor",… => ("爱德华八世", "successor", "乔治六世")
  ("zhong_wuyan_(movie)", "sta… => ("鍾無艷_(電影)", "starring", "梅艳芳")
  ("bard_college", "logo", "fi… => ("巴德学院", "logo", "File:Bard_logo.png")
  ("pegatron_united_t

In [3]:
"三元组 => 提交形式"
function triple2link(triple, lang)
    sub, rel, obj = triple
    if lang == 'e'
        "<http://dbpedia.org/resource/$sub>#" * 
        "<http://dbpedia.org/property/$rel>#" *
        "<http://dbpedia.org/resource/$obj>"
    elseif lang == 'z'
        "<http://zh.dbpedia.org/resource/$sub>#" * 
        "<http://zh.dbpedia.org/property/$rel>#" *
        "<http://zh.dbpedia.org/resource/$obj>"
    end
end

triple2link

#### 测试

In [4]:
triple = rand(triples)
println("检索图谱：\n", triple)
zh = haskey(zh_standard, triple) ? zh_standard[triple] : ""
en = haskey(en_standard, triple) ? en_standard[triple] : ""
isempty(zh) || println("中文图谱匹配结果：\n", zh)
isempty(en) || println("英文图谱匹配结果：\n", en)
print("提交链接-")
isempty(zh) || println("中文图谱：\n", triple2link(triple, 'z'))
isempty(en) || println("英文图谱：\n", triple2link(triple, 'e'))

检索图谱：
("leopold_iii_of_belgium", "title", "duke_of_brabant")
英文图谱匹配结果：
("Leopold_III_of_Belgium", "title", "Duke_of_Brabant")
提交链接-英文图谱：
<http://dbpedia.org/resource/leopold_iii_of_belgium>#<http://dbpedia.org/property/title>#<http://dbpedia.org/resource/duke_of_brabant>


### 寻找路径（不考虑路径推理）

#### 函数

In [7]:
# 初始化边集
edges = DefaultDict{String, Vector{Tuple}}(Vector{Tuple})
for (sub, rel, obj) in triples
    push!(edges[sub], (rel, obj))
end

In [8]:
"寻找从 ner 出发以 rels 为关系的子图（不做路径推理）"
function find_paths(ner, rels)
    paths = [[(ner, "", ner)]] # 初始路径
    for rel in rels, _ in eachindex(paths)
        path = popfirst!(paths) 
        ner = path[end][end] # 取路径终点
        for (newrel, obj) in edges[ner]
            newrel == rel && push!(paths, vcat(path, (ner, newrel, obj)))
        end
    end
    popfirst!.(paths) # 去除路径的初始节点（平凡）
    paths
end

find_paths

#### 测试

In [9]:
sol = rand(train_ques_sols)
que, ner, rels = sol[1], sol[2], sol[3:end]
println("头结点和关系：")
println(ner, '\t', join(rels, '\t'))
paths = find_paths(ner, rels)
for (i, path) in enumerate(paths)
    println("\n第 $i 条路径")
    for triple in path
        println(triple)
    end
end

头结点和关系：
telegonos	parents	parents

第 1 条路径
("telegonos", "parents", "odysseus")
("odysseus", "parents", "laertes")

第 2 条路径
("telegonos", "parents", "odysseus")
("odysseus", "parents", "anticlea")


### 提取跨语言路径

#### 函数

In [1]:
"翻译的三元组路径 => 原始三元组路径 + 三元组语言信息"
function valid_path(path)
    signs = [""]
    # BFS 搜索可能的符号信息
    for triple in path, _ in eachindex(signs)
        s = popfirst!(signs) # 当前符号路径
        haskey(zh_standard, triple) && push!(signs, s * "z")
        haskey(en_standard, triple) && push!(signs, s * "e")
    end
    [[sign, [s == 'z' ? zh_standard[t] : en_standard[t] 
            for (s,t) in zip(sign, path)]] for sign in signs if length(unique(sign)) > 1]
end

valid_path

In [11]:
"三元组路径 => 提交形式"
submit_format(triples, langs) = join([triple2link(triple, lang)
        for (triple, lang) in zip(triples, langs)], '#')

submit_format

#### 测试

In [12]:
sol = rand(train_ques_sols)
que, ner, rels = sol[1], sol[2], sol[3:end]
println("问题，命名实体，关系：\n", que)
println(ner, '\t', join(rels, '\t'))

paths = find_paths(ner, rels)
for path in paths, (sign, tris) in valid_path(path)
    println("\n可行路径:")
    println(submit_format(tris, sign))
end

问题，命名实体，关系：
what is the title of the person whose name is used to name the NER?
nansen_(lunar_crater)	eponym	title

可行路径:
<http://dbpedia.org/resource/Nansen_(lunar_crater)>#<http://dbpedia.org/property/eponym>#<http://dbpedia.org/resource/Fridtjof_Nansen>#<http://zh.dbpedia.org/resource/弗里乔夫·南森>#<http://zh.dbpedia.org/property/title>#<http://zh.dbpedia.org/resource/Rector_of_the_University_of_St_Andrews>

可行路径:
<http://dbpedia.org/resource/Nansen_(lunar_crater)>#<http://dbpedia.org/property/eponym>#<http://dbpedia.org/resource/Fridtjof_Nansen>#<http://dbpedia.org/resource/Fridtjof_Nansen>#<http://dbpedia.org/property/title>#<http://dbpedia.org/resource/Rector_of_the_University_of_St_Andrews>

可行路径:
<http://dbpedia.org/resource/Nansen_(lunar_crater)>#<http://dbpedia.org/property/eponym>#<http://dbpedia.org/resource/Fridtjof_Nansen>#<http://zh.dbpedia.org/resource/弗里乔夫·南森>#<http://zh.dbpedia.org/property/title>#<http://zh.dbpedia.org/resource/List_of_diplomats_from_Norway_to_the_United_

### 问题提交

#### 函数

In [26]:
valid_submit(sol::AbstractString) = valid_submit(split(sol, '\t'))
function valid_submit(sol::AbstractVector)
    que, ner, rels = sol[1], sol[2], sol[3:end]
    paths = find_paths(ner, rels)
    valids = String[]
    for path in paths, (sign, tris) in valid_path(path)
        push!(valids, submit_format(tris, sign))
    end
    valids
end

valid_submit (generic function with 3 methods)