# Prepare template to input NJECD core meanings

In [1]:
import pandas as pd

## Load data

Kanjidic

In [2]:
kanji = pd.read_parquet('./data/kanjidic.parquet')
kanji = kanji.rename({'frequency': 'kanji_frequency'}, axis=1)
kanji.head()

Unnamed: 0,kanji,jis208,jis212,jis213,unicode,radical,radical_name,stroke_count,grade,kanji_frequency,...,skip_code,onyomi,kunyomi,nanori,meanings,n_onyomi,n_nanori,n_kunyomi,n_kunyomi_distinct,jis_level
0,亜,16-01,,,4E9C,7,,7,8.0,,...,4-7-1,[ア],[つ.ぐ],"[や, つぎ, つぐ]","[Asia, rank next, come after, -ous]",1,3,1,1,1
1,唖,16-2,,,5516,30,,10,,,...,1-3-7,"[ア, アク]",[おし],,"[mute, dumb]",2,0,1,1,1
2,娃,16-3,,,5A03,38,,9,9.0,,...,1-3-6,"[ア, アイ, ワ]",[うつく.しい],[い],[beautiful],3,1,1,1,1
3,阿,16-4,,,963F,170,,8,9.0,,...,1-3-5,"[ア, オ]","[おもね.る, くま]","[ほとり, あず, あわ, おか, きた, な]","[Africa, flatter, fawn upon, corner, nook, rec...",2,6,2,2,1
4,哀,16-5,,,54C0,30,,9,8.0,,...,2-2-7,[アイ],"[あわ.れ, あわ.れむ, かな.しい]",,"[pathetic, grief, sorrow, pathos, pity, sympat...",1,0,3,2,1


In [3]:
kanji['njecd_no'] = kanji['njecd_no'].fillna(0).astype(int)
kanji['kkld_no'] = kanji['kkld_no'].fillna(0).astype(int)
kanji['kkd_no'] = kanji['kkd_no'].fillna(0).astype(int)

## Get relevant data

In [28]:
template = kanji.loc[kanji['kkd_no'] > 0, ['kanji', 'grade', 'unicode', 'radical']].copy()

template

Unnamed: 0,kanji,grade,unicode,radical
0,亜,8,4E9C,7
2,娃,9,5A03,38
3,阿,9,963F,170
4,哀,8,54C0,30
5,愛,4,611B,61
...,...,...,...,...
13103,辶,,FA66,162
13104,逸,10,FA67,162
13105,難,10,FA68,172
13106,響,10,FA69,180


## Merge data that was already input

In [29]:
data = pd.read_csv('./data/njecd_core_meanings.csv')

data = data.drop('grade', axis=1)

data.head()

Unnamed: 0,kanji,jlpt_level,njecd_no,kkd_no,kkld_no,skip_code,core_meaning_1,core_meaning_2,core_meaning_3,variant_of
0,刂,,1.0,1,0.0,1-1-1,radical rittō,,,3642.0
1,丷,,,2,0.0,1-1-1,radical hachigashira,,,3644.0
2,儿,,4.0,3,0.0,1-1-1,radical ninnyō (or hitoashi),,,
3,,,,4,0.0,1-1-1,eight,,,3644.0
4,⺌,,,5,0.0,1-1-2,radical naogashira,,,7.0


In [30]:
data = data.merge(template, on='kanji', how='left')

In [31]:
len(data)

4408

## Save the final result

In [32]:
data.head()

Unnamed: 0,kanji,jlpt_level,njecd_no,kkd_no,kkld_no,skip_code,core_meaning_1,core_meaning_2,core_meaning_3,variant_of,grade,unicode,radical
0,刂,,1.0,1,0.0,1-1-1,radical rittō,,,3642.0,,5202,18.0
1,丷,,,2,0.0,1-1-1,radical hachigashira,,,3644.0,,,
2,儿,,4.0,3,0.0,1-1-1,radical ninnyō (or hitoashi),,,,,513F,10.0
3,,,,4,0.0,1-1-1,eight,,,3644.0,,,
4,⺌,,,5,0.0,1-1-2,radical naogashira,,,7.0,,,


In [34]:
data.to_csv('./data/njecd_core_meanings_template.csv', index=False)