# 路径还原与问题提交

代码参见 `../src/submit.jl`，调用 `submit(predict)` 将预测数据转化为提交格式

函数说明：
- `standard_triple(triple, lang)` 还原单个带链接的三元组
- `submit_format(triples, langs)` 将多个三元组转化为标准格式
- `standard_path(path)` 将路径中的三元组转化为翻译前的三元组，同时返回语言信息
- `find_paths(ner, rels)` 寻找以 `ner` 为起点，`rels` 为路径的子图
- `submit(sol; check = false)` 根据预测数据检索路径，`check` 指定是否根据跳数及跨语言进行筛选

补充说明：
- `find_paths, submit` 依赖变量 `edges`
- `recover_paths` 依赖变量 `MT_zh_new2raw, MT_en_new2raw`
- 这三个函数与翻译相关

In [212]:
1500  * 0.2647

397.05

In [131]:
include("../src/translatedata.jl")
include("../src/submit.jl")
include("../src/deduction.jl")
mkpath("submit")

"submit"

In [137]:
# 初始化查询字典
train_rels = DefaultDict{String, Vector{Tuple}}(Vector{Tuple})
for sols in unique(train_ques_sols)
    que = first(sols)
    rels = Tuple(@view(sols[3:end]))
    rels ∉ train_rels[que] && push!(train_rels[que], rels)
end
"匹配最近 10 个问题"
top10 = nearby(unique(train_ques))

top10

### 问题提交

In [177]:
sol = valid_ques_ner[1]
que, ner = sol
rels = find_rels(que)
find_paths_vague(ner, rels)

2-element Vector{Any}:
 "ez"
 [("Felix_Chung", "alongside", "Vincent_Fang_(entrepreneur)"), ("方剛", "constituency", "批發及零售界功能界別")]

In [151]:
# 基本函数
"问题-ner-若干关系 => 标准形式"
function submit(sol)
    # 提取信息
    sol = split(sol, '\t')
    que, ner, rels = sol[1], sol[2], sol[3:end]
    # ind = findfirst(==([que, ner]), valid_ques_ner) - 1
    # 获取问题解答
    paths = filter(i->length(unique(i[1]))>1, find_paths(ner, rels))
    if !isempty(paths)
        sign, path = last(paths)
    else
        rels = find_rels(refine_que(que))
        sign, path = find_paths_vague(ner, rels)
    end 
    submit_format(path, sign)
end

submit

In [129]:
function find_rels(que)
    ques, scores = top10(que)
    rels = vcat([train_rels[que] for que in ques]...)
    rel1 = first(sort_count(counter(first.(rels))))[1]
    rel2 = first(sort_count(counter(take(2).(rels))))[1]
    return [rel1, rel2]
end

find_rels (generic function with 1 method)

In [176]:
"寻找子图，但推理对齐"
function find_paths_vague(ner, rels)
    tri1 = filter(i->i[1]==ner && i[2]==rels[1], triples)[1]
    possible = filter(i->i[2] == rels[2], triples)
    res, scores = nearby(first.(possible), tri1[3]; char=false)
    tri2 = filter(i->i[1]==res[1] && i[2]==rels[2], triples)[1]
    if length(rels) == 2
        standard_path([tri1, tri2])[1]
    else
        possible = filter(i->i[2] == rels[3], triples)
        res, scores = nearby(first.(possible), tri2[3]; char=false)
        tri3 = filter(i->i[1]==res[1] && i[2]==rels[3], triples)[1]
        standard_path([tri1, tri2, tri3])[1]
    end
end

find_paths_vague

In [34]:
"寻找子图，并返回三元组（精确子图）"
function find_paths(ner, rels)
    paths = [[(ner, "", ner)]] # 初始路径
    for rel in rels, _ in eachindex(paths)
        path = popfirst!(paths) 
        ner = path[end][end] # 取路径终点
        haskey(edges, ner) || continue
        for (newrel, obj) in edges[ner]
            newrel == rel && push!(paths, vcat(path, (ner, newrel, obj)))
        end
    end
    popfirst!.(paths)
    filter(i->length(unique(i[1]))>1, vcat(standard_path.(paths)...))
end

find_paths

In [None]:
valid_ques_ner[1]

2-element Vector{SubString{String}}:
 "where is the constituency of the one who is alongside NER from?"
 "felix_chung"

In [None]:
find_rels(ner)

2-element Vector{SubString{String}}:
 "eponym"
 "leader"

In [193]:
que, ner = valid_ques_ner[1]
rels = find_rels(que)
sign, path = find_paths_vague(ner, rels)
submit_format(path, sign)

"<http://dbpedia.org/resource/Felix_Chung>#<http://dbpedia.org/property/alongside>#<http://dbpedia.org/resource/Vincent_Fang_(entrepreneur)>#<http://zh.dbpedia.org/resource/方剛>#<http://zh.dbpedia.org/property/constituency>#<http://zh.dbpedia.org/resource/批發及零售界功能界別>"

In [206]:
# 读取预测结果
submit_id = 12
fails = String[]
open("submit/submit_$(submit_id).txt", "w") do io
    println(io, "id\tans_path")
    for (i, (que, ner)) in enumerate(valid_ques_ner)
        rels = find_rels(que)
        line = temp
        # 没有合适解的情况
        try
            sign, path = find_paths_vague(ner, rels)
            line = submit_format(path, sign)
        catch
            try
                line = submit(sols[i])
            catch
            end
            push!(fails, que)
        end
        println(io, i-1, '\t', line)
    end
end

In [181]:
# 读取预测结果
sols = split(strip(read(open("predict_data.txt", "r"), String)), '\n')
temp = submit(sols[1])
fails = String[]
submit_id = 12
open("submit/submit_$(submit_id).txt", "w") do io
    println(io, "id\tans_path")
    for (i, predict) in enumerate(sols)
        line = temp
        try
            line = submit(predict)
        catch
            push!(fails, predict)
        end
        # 没有合适解的情况
        println(io, i-1, '\t', line)
    end
end

In [None]:
0.2647 * 1500

397.05

In [4]:
fails, multisol, submit_id = String[], String[], 8
open("submit/submit_$(submit_id).txt", "w") do io
    println(io, "id\tans_path")
    for (i, predict) in enumerate(sols)
        lines = submit(predict; check=true)
        # 多解的情况
        length(lines) > 1 && push!(multisol, predict)
        # 没有合适解的情况
        isempty(lines) && (lines = submit(predict); push!(fails, predict))
        println(io, i-1, '\t', last(lines))
    end
end