# 概要

Juliaとawabi.jlによる形態素解の実験

## データの取得

In [37]:
using HTTP, JSON, CSV, TableOperations, DataFrames # ライブラリをインポート


function get_and_write(url)
    response = HTTP.get(url)
    response.status != 200 && (return error("Error: $(response.status)"))

    file_name = replace(url, "/" => "_")
    write(file_name, String(response.body))
    return file_name
end

"""
ニコニコ動画のデータを収集しファイルに保存する関数

参考: https://dwango.github.io/niconico/genre_ranking/ranking_log
"""
function collect_nico_data(ranking_type, date, file_type, endpoint="https://dcdn.cdn.nimg.jp/nicovideo/old-ranking")
    url = "$(endpoint)/$(ranking_type)/$(date)/$(file_type)"
    return get_and_write(url)
end

"""
ファイル名リストからタグを検索し、ファイル名を返す関数
"""
function search_tag(tag, file_name)
    file_data_list = JSON.parsefile(file_name)
    for file_data ∈ file_data_list
        file_data["tag"] == tag && (return file_data["file"])
    end
    return error("Error: $(tag) is not found")
end

file_name_list_name = collect_nico_data("daily", "2023-09-23", "file_name_list.json")
ac6_file_name       = search_tag("アーマードコア6", file_name_list_name)
ac6_result_file     = collect_nico_data("daily", "2023-09-23", ac6_file_name)

file_data_list = JSON.parsefile(file_name) = Any[Dict{String, Any}("genre" => "全ジャンル", "tag" => nothing, "file" => "all.json"), Dict{String, Any}("genre" => "エンターテイメント", "tag" => nothing, "file" => "entertainment.json"), Dict{String, Any}("genre" => "ラジオ", "tag" => nothing, "file" => "radio.json"), Dict{String, Any}("genre" => "音楽・サウンド", "tag" => nothing, "file" => "music_sound.json"), Dict{String, Any}("genre" => "ダンス", "tag" => nothing, "file" => "dance.json"), Dict{String, Any}("genre" => "動物", "tag" => nothing, "file" => "animal.json"), Dict{String, Any}("genre" => "自然", "tag" => nothing, "file" => "nature.json"), Dict{String, Any}("genre" => "料理", "tag" => nothing, "file" => "cooking.json"), Dict{String, Any}("genre" => "旅行・アウトドア", "tag" => nothing, "file" => "traveling_outdoor.json"), Dict{String, Any}("genre" => "乗り物", "tag" => nothing, "file" => "vehicle.json"), Dict{String, Any}("genre" => "スポーツ", "tag" => nothing, "file" => "sports.json"), Dict{String, Any}("genre" => "社会・政治・




"https:__dcdn.cdn.nimg.jp_nicovideo_old-ranking_daily_2023-09-23_game_04.json"

## 形態素解析の実験


In [18]:
using Awabi, PrettyPrint

text      = "すもももももももものうち"
tokenizer = Tokenizer(Dict("dicdir"=>"/home/linuxbrew/.linuxbrew/lib/mecab/dic/ipadic"))
tokens    = tokenize(tokenizer, text)

pprintln(tokens)


[
  ("すもも", "名詞,一般,*,*,*,*,すもも,スモモ,スモモ"),
  ("も", "助詞,係助詞,*,*,*,*,も,モ,モ"),
  ("もも", "名詞,一般,*,*,*,*,もも,モモ,モモ"),
  ("も", "助詞,係助詞,*,*,*,*,も,モ,モ"),
  ("もも", "名詞,一般,*,*,*,*,もも,モモ,モモ"),
  ("の", "助詞,連体化,*,*,*,*,の,ノ,ノ"),
  ("うち", "名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ"),
]
