In [4]:
import pandas as pd
import re
import json

In [5]:
# IBRA
ibra = pd.read_csv('data/ibra.csv')
ibra_bioregions = pd.read_csv('data/ibra_bioregions.csv')

# IUCN
bioregion_by_IUCN = pd.read_csv('data/bioregion_by_IUCN.csv')
IUCN_category = pd.read_csv('data/IUCN_category.csv')

### IBRA

In [6]:
ibra.head()

Unnamed: 0,IBRA Region Code,total_ce
0,ARC,1
1,ARP,2
2,AUA,11
3,AVW,16
4,BBN,5


In [7]:
ibra_bioregions.head()

Unnamed: 0,IBRA Region Name,IBRA Region Code,Jurisdiction(s),IBRA Area (ha),1 Area Protected (ha),% IBRA Region Protected,2 Contribution to NRS (%)
0,Arnhem Coast,ARC,NT,3335669,1779616,53.35,1.17
1,Arnhem Plateau,ARP,NT,2306023,1728677,74.96,1.14
2,Australian Alps,AUA,"ACT, NSW, VIC",1232981,788727,63.97,0.52
3,Avon Wheatbelt,AVW,WA,9517104,375948,3.95,0.25
4,Ben Lomond,BEL,TAS,657500,106341,16.17,0.07


In [8]:
ibra_bioregions_w_total_ce = pd.merge(ibra_bioregions, ibra, on="IBRA Region Code")
ibra_bioregions_w_total_ce.head()

Unnamed: 0,IBRA Region Name,IBRA Region Code,Jurisdiction(s),IBRA Area (ha),1 Area Protected (ha),% IBRA Region Protected,2 Contribution to NRS (%),total_ce
0,Arnhem Coast,ARC,NT,3335669,1779616,53.35,1.17,1
1,Arnhem Plateau,ARP,NT,2306023,1728677,74.96,1.14,2
2,Australian Alps,AUA,"ACT, NSW, VIC",1232981,788727,63.97,0.52,11
3,Avon Wheatbelt,AVW,WA,9517104,375948,3.95,0.25,16
4,Ben Lomond,BEL,TAS,657500,106341,16.17,0.07,7


### IUCN

In [9]:
bioregion_by_IUCN.head()

Unnamed: 0,IBRA Region Name,IBRA Region Code,IA,IB,II,III,IV,I-IV Total,% IBRA Region,V,VI,V-VI Total,% IBRA Region.1,Not Applicable,Not Assigned,Total Area Protected (ha),IBRA Region Area (ha),% IBRA Region protected
0,Arnhem Coast,ARC,,,,,,-,0.0,396725.0,1382891,1779616,53.35,,,1779616,3335669,53.35
1,Arnhem Plateau,ARP,,,486415.0,,,486415,21.09,,1242262,1242262,53.87,,,1728677,2306023,74.96
2,Australian Alps,AUA,11079.0,27674.0,738549.0,10296.0,244.0,787842,63.9,0.0,886,886,0.07,,,788727,1232981,63.97
3,Avon Wheatbelt,AVW,163602.0,,200779.0,,165.0,364547,3.83,,4850,4850,0.05,,6552.0,375948,9517104,3.95
4,Ben Lomond,BEL,28.0,,19950.0,5820.0,60610.0,86408,13.14,601.0,19333,19933,3.03,,,106341,657500,16.17


In [10]:
IUCN_category.head()

Unnamed: 0,IUCN Category,1 Number,2 Area (ha),3 Average size (ha),4 % of Australia,Contribution to NRS (%)
0,IA,2541,15966348,6283,2.07,10.52
1,IB,65,3846201,59172,0.5,2.53
2,II,1084,38096535,35144,4.96,25.1
3,III,2375,1865843,786,0.24,1.23
4,IV,4195,2274848,542,0.3,1.5


In [11]:
ibra_bioregions_w_total_ce.to_csv('data/ibra_bioregions_w_total_ce.csv')

### Scripting that reformats data for sunburst graph

In [12]:
with open('data/ibra_bioregions.csv') as opener:
    data = opener.readlines()
    header = data[0].strip().split(',')
    data = data[1:]
    # too ugly
    data = [re.sub(r'(" \d+)\,', r'\1', i).strip() for i in data]
    data = [re.sub(r'(" \d+)\,', r'\1', i) for i in data]
    data = [re.sub(r'(" \d+)\,', r'\1', i) for i in data]
    data = [re.sub(r'"', r'', i) for i in data]
    data_ungroup = []
    for d in data:
        values = d.split(',')
        if values[2] != '-':
            first = values[:2]
            last = [float(j) for j in values[-4:]]
            states = [s.strip() for s in values[2: -4]] 
            for state in states:
                data_ungroup.append(first + [state] + last)

In [13]:
data_groupby_state = dict()
for d in data_ungroup:
    if d[2] not in data_groupby_state:
        data_groupby_state[d[2]] = [d[: 2] + d[3:]]
    else:
        data_groupby_state[d[2]].append(d[: 2] + d[3:])

In [14]:
data_json = {'name': 'flare', 'children': []}

for state, values in data_groupby_state.items():
    data_json['children'].append({
        'name': state,
        'children': [
            {
                'name': v[0], 
                'children': [
                    {
                        'name': f'Contribution to NRS(%): {v[-1]}',
                        'value': v[-1]//2
                    },
                    {
                        'name': f'IBRA Region Protected(%): {v[-2]}',
                        'value': v[-1]//2
                    }
                ]
            } for v in values]
    })

with open('data/sunburst_graph_data.json', 'w') as outfile:
    json.dump(data_json, outfile, indent=2)