In [1]:
import json
import pandas as pd

In [2]:
datfilename = '2016-07-21_15-43.json'
with open(datfilename) as datfile:
    dat = json.load(datfile)

# JSON to table of media files

In [3]:
def json_to_media_csv(filename,outfilename='media_files.csv'):

    with open(filename) as datafile:
        data = json.load(datafile)

    rowlist = [['COMPLAINT_NUMBER', 'MEDIA_FILE_TYPE', 'IPRA_PORTAL_LABEL', 'IPRA_URL']]
    media_dict = {'documents' : 'DOCUMENT', 'audio' : 'AUDIO', 'video' : 'VIDEO'}
    for recordno in sorted(data['records'].keys()):
        for mediatype in ['video','audio','documents']:
            for entry in data['records'][recordno][mediatype]:
                rowlist.append([
                    recordno, media_dict[mediatype],entry['description'],entry['url']
                    ])

    df = pd.DataFrame(rowlist[1:],columns=rowlist[0])
    df.to_csv(outfilename,index=False)

In [4]:
json_to_media_csv(datfilename)

# JSON to table of subjects

In [5]:
def json_to_subject_csv(filename,outfilename='subjects.csv'):
    """Reads in scraped json file and writes CSV table of subjects in cases
    """

    # Read file
    with open(filename) as datafile:
        data = json.load(datafile)

    # Create list of lists of data we want in csv
    rowlist = [['COMPLAINT_NUMBER', 'INCIDENT_TYPE', 'INCIDENT_DATETIME', 'SUBJECT_NAME']]
    for recordno in sorted(data['records'].keys()):
        entry = data['records'][recordno]
        for person in entry['subjects']:
                rowlist.append([
                    recordno, entry['incident_type'],entry['incident_date_time'],person
                    ])

    # Use pandas to write csv file
    df = pd.DataFrame(rowlist[1:],columns=rowlist[0])
    df.to_csv(outfilename,index=False)

In [6]:
json_to_subject_csv(datfilename)

# JSON to incidents summary

In [9]:
def json_to_incidents_csv(filename,outfilename='incidents_summary.csv'):
    """Reads in scraped json file and writes CSV table of summarizing incidents.
    """

    # Read file
    with open(filename) as datafile:
        data = json.load(datafile)

    # Create list of lists of data we want in csv
    rowlist = [['COMPLAINT_NUMBER', 
                'POST_DATETIME',
                'INCIDENT_TYPE', 
                'INCIDENT_DATETIME', 
                'IPRA_NOTIF_DATE',
                'SUBJECTS_COUNT',
                'VIDEO_COUNT',
                'AUDIO_COUNT',
                'DOCUMENT_COUNT',
                'NOTES'
               ]]
    for recordno in sorted(data['records'].keys()):
        entry = data['records'][recordno]
        rowlist.append([
                recordno,
                entry['post_date_time'],
                entry['incident_type'],
                entry['incident_date_time'],
                entry['ipra_notification_date'],
                len(entry['subjects']),
                len(entry['video']),
                len(entry['audio']),
                len(entry['documents']),
                entry['notes'] if len(entry['notes']) > 0 else None
            ])

    # Use pandas to write csv file
    df = pd.DataFrame(rowlist[1:],columns=rowlist[0])
    df.to_csv(outfilename,index=False)

In [10]:
json_to_incidents_csv(datfilename)