# Viewing JSON data
Select Cell/Run All to start analyzing data.

In [None]:
import genson
import hashlib
import json  
import pandas as pd  
import requests
import sys

from requests.auth import HTTPBasicAuth
from os import path
from genson import SchemaBuilder

In [None]:
# Upload and save a file to this notebook/host
from ipywidgets import FileUpload
uploader = FileUpload()

def on_value_change(change):
    [first_upload] = change['new']
    metadata = uploader.value[first_upload]['metadata']
    filename = metadata['name']
    content = uploader.value[first_upload]['content']
    
    print("Writing content to: " + filename)
    f = open(filename, "wb+")
    f.write(content)
    f.close()
    
uploader.observe(on_value_change, names='value')
display(uploader)

In [None]:
# return pickle cache filename for a given key
def get_cache_file_name(key, ext):
    return "./{key}.{ext}".format(key=key, ext=ext)

# Given a key return cached DataFrame or None
def get_cached_df(key):
    file_name = get_cache_file_name(key, 'pkl')
    if path.exists(file_name):
        print("Cache found: {file_name}".format(file_name=file_name))
        return pd.read_pickle(file_name)
    else:
        return None
    
# Store pickled DataFrame
def cache_df(df, key):
    file_name = get_cache_file_name(key, 'pkl')
    df.to_pickle(file_name)    
    
# Acquire text from URL or file
def read_json_text(path):
    text = None
    if path.startswith('http'):
        r = requests.get(path)
        text = r.text
    else:
        data_file = open(path)
        text = data_file.read()
    return text
        
# Load JSON into a DataFrame given the a path to the JSON data
def load_json(path):
    key = hashlib.md5(path.encode('utf-8')).hexdigest()
    df = get_cached_df(key)
    if df is not None:
        return df
        
    text = read_json_text(path)
    jsdata = json.loads(text)
    new_df = pd.json_normalize(jsdata)
            
    cache_df(new_df, key)
    return new_df

# Given a URL of path to json data will attempt to display the JSON schema
def display_schema(path):
    text = read_json_text(path)
    jsdata = json.loads(text)
    
    builder = SchemaBuilder()
    builder.add_schema({"type": "object", "properties": {}})
    builder.add_object(jsdata)
    schema = builder.to_schema()
    print(json.dumps(schema, indent=1))
    

In [None]:
display_schema('https://raw.githubusercontent.com/prust/wikipedia-movie-data/master/movies.json')

In [None]:
#df = load_json('https://raw.githubusercontent.com/prust/wikipedia-movie-data/master/movies.json')
df = load_json('movies.json')
print(df.info())
df