In [360]:
import json
import pandas as pd

# Let's load Wilhelm's Translation of the I-Ching, and let's poor it into a JSON & CSV file

In `./iching_fulltext.md` we have a full english translation of the Wilhelm I-Ching translation. We want to write a simple parser which will take out all required pieces from the text and store it inside ann appropriately constructed JSON/CSV.

In [349]:
count = 0
with open('./data/iching_willhelm_markdown.md') as f:
    lines = f.readlines()

with open('./data/i-ching-basic.js') as json_file:
    iching_basic = json.load(json_file)

iching_full_text = ''.join(lines)

def iching_raw_text_section_split():
    """ Split markdown into \n-splitted sections, each on a hexagram. Take section 10-74 of the markdown for this as Hexagram Texts """
    start_ii = 10 # hex 1
    end_ii = 74 # hex 64

    # dictionary prep
    iching_raw = {}
    iching_raw['hexagrams'] = {}

    # Split translation by subsectoin (## )
    split = iching_full_text.split("## ")

    # Now join sections back together, but split them on new lines
    for i in range(start_ii, end_ii):
        iching_raw['hexagrams'][i-start_ii] = [p.split("\n") for p in split][i]

    return iching_raw

iching_raw = iching_raw_text_section_split()

In [350]:
# preview
iching_raw['hexagrams'][55][:20]

['56. Lü / The Wanderer',
 '[Back to Index](#index)',
 '',
 'above LI THE CLINGING, FIRE',
 '',
 'below KêN KEEPING STILL, MOUNTAIN',
 '',
 "The mountain, Kên, stands still; above it fire, Li, flames up and does not tarry. Therefore the two trigrams do not stay together. Strange lands and separation are the wanderer's lot.",
 '',
 'THE JUDGMENT',
 '',
 'The Wanderer. Success through smallness.',
 'Perseverance brings good fortune to the wanderer.',
 '',
 'WHEN A man is a wanderer and stranger, he should not be gruff nor overbearing. He has no large circle of acquaintances, therefore he should not give himself airs. He must be cautious and reserved; in this way he protects himself from evil. If he is obliging toward others, he wins success.',
 '',
 'A wanderer has no fixed abode; his home is the road. Therefore he must take care to remain upright and steadfast, so that he sojourns only in the proper places, associating only with good people. Then he has good fortune and can go his way u

In [351]:
keywords = [ 'above', 'below', 'THE JUDGMENT', 'THE IMAGE', 'THE LINES' ]

We want to do the following:
- identify lines with above keywords
- Extract parts of text into json structure (if applicable)

In [352]:
def wilhelm_hexagram(iching_hexagrams, idx):
    """ Get keywords positions (line number) and hexagram text of hexagram `idx` (eg 63) from iching_hexagrams
        Note that here: Hexagram 1 = iching_hexagrams[0], Hexagram 64 = iching_hexagrams[63]
    """

    # get a single iching hexagram
    hexg = iching_hexagrams[idx]

    # create indice arrray where keywords occur
    idx_array = {}

    # iteratie through lines in hexagram
    for i, l in enumerate(hexg):

        # now we're gonna check if the line contains a special keyword
        for k in keywords:
            
            # if keyword in line
            if k in l[:len(k)]:
                idx_array[k] = i

    return hexg, idx_array

Todo:
- Get started by adding to a dictionary of symbolic representation of the hexagram.
- Judgment section could perhaps be renamed to "Law", and so is everyone writing their own Law, if allowed to have their own interpretation.

In [353]:
# key-map values on hex-value
iching_extented_start = { item['hex']:item for ii, item in enumerate(iching_basic) }

## Let's define core functions

In [354]:
def get_basic_info(hex_text, indices):
    """ Retrieve basic info / name of hexagram, given hex_text and keyword indices"""
    return hex_text[0]

def retrieve_above(hex_text, indices):
    """ Retrieve above, and interpretation, given hex_text and keyword indices"""
    above_text = hex_text[indices['above']]

    split = above_text.split(' ')
    chinese = split[1]
    symbolic = ' '.join(split[2:4])
    alchemical = split[4]

    # print ( split )
    # print ('chinese -->', chinese )
    # print ('symbolic -->', symbolic )
    # print ('alchemical -->', alchemical )

    return {
        'chinese': chinese,
        'symbolic': symbolic,
        'alchemical': alchemical,
    }

def retrieve_below(hex_text, indices):
    """ Retrieve below, and interpretation, given hex_text and keyword indices"""
    below_text = hex_text[indices['below']]

    split = below_text.split(' ')
    chinese = split[1]
    symbolic = ' '.join(split[2:4])
    alchemical = split[4]

    return {
        'chinese': chinese,
        'symbolic': symbolic,
        'alchemical': alchemical,
    }

def retrieve_symbolic(hex_text, indices):
    """ Retrieve symbolc, and interpretation, given hex_text and keyword indices"""
    symbolic_text = '\n'.join( hex_text[indices['below']+1: indices['THE JUDGMENT']])

    split = symbolic_text.lstrip('\n').rstrip('\n')

    return split

def retrieve_judgment(hex_text, indices):
    """ Retrieve judgment, and interpretation, given hex_text and keyword indices"""
    judgment_text = '\n'.join( hex_text[indices['THE JUDGMENT']+1: indices['THE IMAGE']] )

    split = judgment_text.lstrip('\n').rstrip('\n').split('\n')
    text = '\n'.join( split[:-1] ).lstrip('\n').rstrip('\n') # all but last section
    comments = '\n'.join( split[-1:] ).lstrip('\n').rstrip('\n') # last part of this section

    return {
        'text': text,
        'comments': comments
    }

def retrieve_image(hex_text, indices):
    """ Retrieve image, and interpretation, given hex_text and keyword indices"""
    image_text = '\n'.join( hex_text[indices['THE IMAGE']+1: indices['THE LINES']])

    split = image_text.lstrip('\n').rstrip('\n').split('\n')
    
    text = '\n'.join( split[:-1] ).lstrip('\n').rstrip('\n') # all but last section
    comments = '\n'.join( split[-1:] ).lstrip('\n').rstrip('\n') # last part of this section

    return {
        'text': text,
        'comments': comments
    }

def get_single_line(line_text_rr, keys, key_indices, line):

    if (len(key_indices) < 6):
        print ("Line text does not have 6 keys:",key_indices)
        return [""]

    if line >= len(key_indices):
        # return {
        #     "comments": "\n".join( line_text_rr[key_indices[keys[line-1]]:] ).lstrip('\n').rstrip('\n')
        # }
        info = line_text_rr[key_indices[keys[line-1]]:]
        
    else:
        info = line_text_rr[key_indices[keys[line-1]]:key_indices[keys[line]]]
    

    split = '\n'.join(info[1:]).lstrip('\n').rstrip('\n').split('\n\n', 1)
    text = '\n'.join( split[:-1] ).lstrip('\n').rstrip('\n') # all but last section
    comments = '\n'.join( split[-1:] ).lstrip('\n').rstrip('\n') # last part of this section

    # print ("-----{}----".format(line))
    # print ("text -->", text)
    # print ("comments -->", comments)
    # print ("")

    return {
        'text': text,
        'comments': comments
    }

    # return '\n'.join(info[1:]).lstrip('\n').rstrip('\n')


def retrieve_lines(hex_text, indices):
    """ Retrieve lines, and interpretation, given hex_text and keyword indices"""
    # lines_text = '\n'.join( hex_text[indices['THE LINES']+1:])
    lines_txt_array = hex_text[indices['THE LINES']+1:]

    keys = ["at the beginning means", "in the second place means", "in the third place means", "in the fourth place means", "in the fifth place means", "at the top means","all the lines"]

    # print (lines_txt_array)

    ### create indice arrray where keywords occur
    keys_ii_array = {}

    # iteratie through lines in text
    for i, l in enumerate(lines_txt_array):

        # now we're gonna check if the line contains a special keyword
        for k in keys:
            
            # if keyword in line
            if k in l: #l[:len(k)]:
                keys_ii_array[k] = i

    ## Retrieve text for every single line (1-6)
    lines = {}
    for _l_number in range(1,7):
        lines[_l_number] = get_single_line(lines_txt_array, keys, keys_ii_array, _l_number)

    # print (lines)

    return lines

Now go through all hexagrams in our basic-hex kit, and enricht it with Wilhelm

In [355]:
for kk, hexagram in iching_extented_start.items():

    # if kk != 5:
        # continue

    # retrieve hexagram text kk (64 hexagrams total), and markers with core info in text
    hex_wilhelm, markers = wilhelm_hexagram(iching_raw['hexagrams'], kk-1)

    print ('hex',hexagram['hex'])

    # Enrich hexagram with `hex_wilhelm` info
    hexagram['wilhelm_above'] = retrieve_above(hex_wilhelm, markers)
    hexagram['wilhelm_below'] = retrieve_below(hex_wilhelm, markers)
    hexagram['wilhelm_symbolic'] = retrieve_symbolic(hex_wilhelm, markers)
    hexagram['wilhelm_judgment'] = retrieve_judgment(hex_wilhelm, markers)
    hexagram['wilhelm_image'] = retrieve_image(hex_wilhelm, markers)
    hexagram['wilhelm_lines'] = retrieve_lines(hex_wilhelm, markers)

hex 1
hex 2
hex 3
hex 4
hex 5
hex 6
hex 7
hex 8
hex 9
hex 10
hex 11
hex 12
hex 13
hex 14
hex 15
hex 16
hex 17
hex 18
hex 19
hex 20
hex 21
hex 22
hex 23
hex 24
hex 25
hex 26
hex 27
hex 28
hex 29
hex 30
hex 31
hex 32
hex 33
hex 34
hex 35
hex 36
hex 37
hex 38
hex 39
hex 40
hex 41
hex 42
hex 43
hex 44
hex 45
hex 46
hex 47
hex 48
hex 49
hex 50
hex 51
hex 52
hex 53
hex 54
hex 55
hex 56
hex 57
hex 58
hex 59
hex 60
hex 61
hex 62
hex 63
hex 64


In [356]:
iching_extented_start[5]

{'hex': 5,
 'hex_font': '䷄',
 'trad_chinese': '需',
 'pinyin': 'xū',
 'english': 'Needing',
 'binary': '010111',
 'od': 35,
 'wilhelm_above': {'chinese': "K'AN",
  'symbolic': 'THE ABYSMAL,',
  'alchemical': 'WATER'},
 'wilhelm_below': {'chinese': "CH'IEN",
  'symbolic': 'THE CREATIVE,',
  'alchemical': 'HEAVEN'},
 'wilhelm_symbolic': 'All beings have need of nourishment from above. But the gift of food comes in its own time, and for this one must wait. This hexagram shows the clouds in the heavens, giving rain to refresh all that grows and to provide mankind with food and drink. The rain will come in its own time. We cannot make it come; we have to wait for it. The idea of waiting is further suggested by the attributes of the two trigrams--strength within, danger in from. Strength in the face of danger does not plunge ahead but bides its time, whereas weakness in the face of danger grows agitated and has not the patience to wait.',
 'wilhelm_judgment': {'text': 'WAITING. If you are sin

## Write to I-Ching Wilhelm Translation

In [357]:
with open('./data/iching_wilhelm_translation.json', 'w') as outfile:
    json.dump(iching_extented_start, outfile)

A few broad checks for JSONN integrity

In [359]:
for kk, hexagram in iching_extented_start.items():

    for _l_number in range(1,7):
        if hexagram['wilhelm_lines'][_l_number]['text'] == "":
            print ("Hex {} Line {}, {} empty!".format(kk, _l_number, "text"))

        if hexagram['wilhelm_lines'][_l_number]['comments'] == "":
            print ("Hex {} Linne {}, {} empty!".format(kk, _l_number, "comments"))

    # print ( hexagram['wilhelm_lines'][6] )

In [364]:
# write to CSV
df = pd.read_json('./data/iching_wilhelm_translation.json', orient='index')

In [368]:
# re-arrange columns
df = df[['hex', 'hex_font', 'binary', 'english', 'od', 'pinyin', 'trad_chinese',
       'wilhelm_above', 'wilhelm_below', 'wilhelm_symbolic', 'wilhelm_image', 'wilhelm_judgment', 'wilhelm_lines']]

In [370]:
df.to_csv('./data/iching_wilhelm_translation.csv', index=False, sep=';')