# Create OGSL table


In [1]:
import pandas as pd
import zipfile
import json
import tqdm
import requests
import errno
import os
import pickle

## 0 Create Directories, if Necessary
The two directories needed for this script are `jsonzip` and `output`. If they do not exist they are created, else: do nothing.

For the code, see [Stack Overflow](http://stackoverflow.com/questions/18973418/os-mkdirpath-returns-oserror-when-directory-does-not-exist).

In [2]:
directories = ['jsonzip', 'output']
for d in directories:
    try:
        os.mkdir(d)
    except OSError as exc:
        if exc.errno !=errno.EEXIST:
            raise
        pass

## 1 Download the ZIP file

In [3]:
project = "ogsl"
CHUNK = 16 * 1024
url = "http://build-oracc.museum.upenn.edu/json/" + project + ".zip"
file = 'jsonzip/' + project + '.zip'
r = requests.get(url)
if r.status_code == 200:
    print("Downloading " + url + " saving as " + file)
    with open(file, 'wb') as f:
        for c in r.iter_content(chunk_size=CHUNK):
            f.write(c)
else:
    print(url + " does not exist.")

Downloading http://build-oracc.museum.upenn.edu/json/ogsl.zip saving as jsonzip/ogsl.zip


# <a name="head21"></a>2 The `parsejson()` function

In [4]:
def parsejson(data_json):
    for key, value in data_json["signs"].items():
        if "values" in value:
            for n in value["values"]:
                s["value"] = n
                s["name"] = key
                if "utf8" in value:
                    s["utf8"] = value["utf8"]
                    s["hex"] = value["hex"] 
                else:
                    s["utf8"] = ""
                    s["hex"] = ""
                sign = {key : value for key, value in s.items()}
                s_l.append(sign)
    return

# 3 Main Process

In [5]:
s = {}
s_l = []
file = "jsonzip/ogsl.zip"
z = zipfile.ZipFile(file) 
filename = "ogsl/ogsl-sl.json"
signlist = z.read(filename).decode('utf-8')
data_json = json.loads(signlist)                # make it into a json object (essentially a dictionary)
parsejson(data_json)  

# 4 Make Dataframe

In [6]:
df = pd.DataFrame(s_l)
df

Unnamed: 0,hex,name,utf8,value
0,x120F0.x122FA,|GAD.TAK₄|,𒃰𒋺,akkil₃
1,x120F0.x122FA,|GAD.TAK₄|,𒃰𒋺,ašša₂
2,x120F0.x122FA,|GAD.TAK₄|,𒃰𒋺,ašta
3,x120F0.x122FA,|GAD.TAK₄|,𒃰𒋺,kil₄
4,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,iši₂
5,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,kunga
6,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,kungi
7,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,suhub
8,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,suhul
9,x122D9.x1202F,|ŠU₂.3×AN|,𒋙𒀯,sulgar


In [8]:
with open("ogsl.p", "wb") as p:
    pickle.dump(df, p)