In [36]:
# Template for adding a dictionary entry for a Buddhist figure
import re

import pandas as pd
import numpy as np

import curation_util
from korean import getkoreanid
from taisho import geturl
from taisho import saveScrolls

# Load words table
colnames = ['id', 'simplified', 'traditional', 'pinyin', 
            'english', 'grammar', 'concept_cn', 'concept_en', 
            'domain_cn', 'domain_en', 'subdomain_cn', 'subdomain_en',
            'image', 'mp3', 'notes', 'headword']
types = {'id': np.uint32, 'simplified': unicode, 'traditional': unicode, 
        'pinyin': np.object, 'english': np.object, 'grammar': np.object,
        'concept_cn': np.object, 'concept_en': np.object,
        'domain_cn': np.object, 'domain_en': np.object, 'subdomain_cn': np.object,
         'subdomain_en': np.object, 'image': np.object, 'mp3': np.object,
         'notes': np.object, 'headword': np.uint32}
index_col = ['headword', 'id']
df = pd.read_table('../data/dictionary/words.txt', names=colnames, dtype=types,
                  index_col = index_col)
cols = ["simplified", "traditional", "pinyin", "english", "notes"]
df[df.simplified == '脡'][cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,simplified,traditional,pinyin,english,notes
headword,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
46847,46847,脡,\N,tǐng,strips of dried meat / jerky,(Unihan '脡')
46847,46863,脡,\N,tǐng,stiff and straight,(Unihan '脡')


In [37]:
# Load an entry from the Taisho canon
# Input
tid = ""
trad_name = u"菩提多羅"
sanskrit = u""
english = u"Bodhidharma"
grammar = "proper noun"
subdomain = u"\\N\t\\N"
subdomain = u"中国佛教\tChinese Buddhism"
#subdomain = u"日本佛教\tJapanese Buddhism"

# Generated
entry = curation_util.GetEntry(tid)
title_cn = entry["title"]
print title_cn
luid = df.count()[0] + 1
print luid
tradArr = curation_util.ExtractWords(trad_name)
newWord = True
if len(tradArr) == 1:
  print "Existing word"
  newWord = False
else:
  print "New word"
domain = u"佛教\tBuddhism"
#domain = u"古文\tClassical Chinese"
traditional = ""
simplified = ""
pinyin = ""
for t in tradArr:
  sDF = df[df.traditional == t]['simplified']
  tDF = df[df.traditional == t]['traditional']
  pDF = df[df.traditional == t]['pinyin']
  if sDF.count() > 0:
    simplified += sDF.iloc[0].decode('utf-8')
    traditional +=tDF.iloc[0].decode('utf-8')
    pinyin += pDF.iloc[0].decode('utf-8') + " "
  else:
    sDF = df[df.simplified == t]['simplified']
    tDF = df[df.simplified == t]['simplified']
    pDF = df[df.simplified == t]['pinyin']
    if sDF.count() > 0:
      simplified += sDF.iloc[0].decode('utf-8')
      traditional += sDF.iloc[0].decode('utf-8')
      pinyin += pDF.iloc[0].decode('utf-8') + " "
    else:
      print "%s not found" % t
pinyin = pinyin.strip()
print simplified
trad = traditional
if simplified == traditional:
  trad = "\\N"
if grammar == "proper noun":
  pinyin = pinyin.title()

# Do some guessing on the best English name
name_en = u""
if english == "":
  english = curation_util.P2englishPN(pinyin)
if sanskrit != "":
  if english != "":
    english, title_en = u"%s / %s" % (sanskrit, english), u"%s (%s)" % (sanskrit, english)
  else:
    english = u"%s" % sanskrit
if english == "":
  english = english

print trad
print pinyin
print english

Translator  not in dictionary
GetTranslatorEn: Translator '' not found
Translator  not in dictionary
GetTranslatorEn: Translator '' not found
大沙門百一羯磨法
51333
New word
菩提多罗
菩提多羅
Pútí Duōluó
Bodhidharma


In [38]:
# Input
pali = u""
japanese = u""
kid = getkoreanid(tid)
concept = u"法师\tMonastic"
#concept = u"翻译\tTranslator"
#concept = u"作者\tWriter"
#concept = u"\\N\t\\N"
daterange = u"Tang "

# References
abc = u"ABC '%s'" % pinyin
bingenheimer = "Bingenheimer 2016"
bl = u"BL '%s'" % english
bsad = u"BSAD ID：A000617"
bscd = u"BCSD '%s'" % traditional
ccd = u"CCD '%s'" % simplified
fe = u"FE '%s'" % traditional
fgdb = u"FGDB '%s'" % traditional
gced = u"GCED '%s'" % traditional
ghdc = u"GHDC '%s'" % simplified
jebd = u"JEBD '%s'" % japanese
kdc = u"KDC %s" % kid
mw = u"MW '%s'" % sanskrit
ncced = u"NCCED '%s'" % simplified
t = u"T %s" % tid
if kid != "0":
  refArr = [fgdb, kdc, t] # Add based on references checked
elif tid != "":
  refArr = [fgdb, t]
else:
  refArr = [fgdb]

# Generated
if grammar != "proper noun":
  pinyin = pinyin.replace(" ", "")

# Sanskrit, Pali, and Japanese equivalents
fromLang = ""
if sanskrit != "":
  fromLang = u"From Sanskrit: %s" % sanskrit
if pali != "":
  fromLang += u", Pali: %s" % pali
if japanese != "":
  fromLang += u", Japanese: %s" % japanese
if fromLang != "":
  fromLang += u"; "

# References
ref = u""
for r in refArr:
  if r != "":
    ref += r + "; "
ref = re.sub("; $", "", ref)
if ref != "":
  ref = u"(%s)" % ref

#notes = u"%sSong dynasty Chinese Buddhist monastic %s" % (daterange, ref)
#notes = u"%sBuddhist monastic credited with compiling 《%s》, included in the Chinese Buddhist canon %s" % (daterange, title_cn, ref)
notes = u"See 菩提達磨"
line = u"%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\\N\t\\N\t%s\t%d" % (
    luid, simplified, trad, pinyin, english, grammar, concept, domain, subdomain, notes, luid)
curation_util.WriteWordEntry(line)
print line

Wrote line to words file
51333	菩提多罗	菩提多羅	Pútí Duōluó	Bodhidharma	proper noun	法师	Monastic	佛教	Buddhism	中国佛教	Chinese Buddhism	\N	\N	See 菩提達磨	51333
