In [1]:
import pandas as pd
import numpy as np
import json, urllib, ast
from ipysankeywidget import SankeyWidget
from ipywidgets import Layout

In [2]:
data = pd.read_csv('data/links_erl.csv')
with open('data/data_dict_utility.json') as dfile:
    data_dict = json.load(dfile)

In [3]:
data = data[data['year'] == 2015]
data.head(5)

Unnamed: 0,year,data_year,source,target,cumulative_volume_af,transmission_kwh/af,treatment_kwh/af,used_vol_af
1,2015,2010,1801001PD,1801001E,1693.0,163.1,0.0,1693.0
7,2015,2010,1801007NPD,1801007E,4.0,195.72,0.0,4.0
12,2015,2010,1801007PD,1801007E,2235.04,163.1,0.0,2235.04
18,2015,2010,1801007REC,1801007NPD,4.0,0.0,236.0,4.0
22,2015,2010,1801009PD,1801017PD,337.003098,25.0,0.0,333.189604


In [4]:
data[data['target'] == 'SW1805SWP08']

Unnamed: 0,year,data_year,source,target,cumulative_volume_af,transmission_kwh/af,treatment_kwh/af,used_vol_af
9762,2015,2010,SW1804SWP07,SW1805SWP08,59963.05606,965.235,0.0,0.0


In [5]:
# oxnard 1807250
# fresno 1803033

#graph without compensated volume
from sub_network import subWESTnet
gi = subWESTnet('data/links_final021919.csv', 2015)
gi.upstream('1805085E')
gi.graph.edges.values()

sank_dicts = []
for v in gi.edges.values():
    sank_dict = {}
    sank_dict['source'] = v['source']
    sank_dict['target'] = v['target']
    sank_dict['value'] = v['used_vol_af']
    sank_dict['color'] = 'steelblue'
    if v['used_vol_af'] == 0:
        sank_dict['color'] = 'goldenrod'
        sank_dict['value'] = .00001
    sank_dicts.append(sank_dict)

SankeyWidget(layout=Layout(width="1500", height="1000"), 
             margins=dict(top=10, bottom=0, left=100, right=100),
             links=sank_dicts)

# Code for dictionary to use in sankey widget

In [6]:
from sub_network import subWESTnet

def get_sankey_file(data_path, year, utility, upstream=True):
    gi = subWESTnet(data_path, year)
    gi.upstream(utility)
    
    sank_dicts = []
    for v in gi.edges.values():
        sank_dict = {}
        sank_dict['source'] = v['source']
        sank_dict['target'] = v['target']
        sank_dict['value'] = v['used_vol_af']
        sank_dict['color'] = 'steelblue'
        if v['used_vol_af'] == 0:
            sank_dict['color'] = 'goldenrod'
            sank_dict['value'] = .00001
        sank_dicts.append(sank_dict)
    
    df = pd.DataFrame(sank_dicts)
    df = df[df['color'] != 'goldenrod']
    data = pd.read_csv(data_path)
    
    nodes = []
    for i in df['source'].unique():
        sdict = {}
    
        # check if resource/end or not
        check_resource = data[data['target'] == i].shape
        if check_resource[0] == 0:
            sdict['is resource'] = True
        else:
            sdict['is resource'] = False
        if i[-1:] == 'E':
            sdict['is end'] = True
        else:
            sdict['is end'] = False
    
        # compute in/out volumes
        outv = df[df['source'] == i]['value'].sum()
        inv = df[df['target'] == i]['value'].sum()
    
        # get values to compensate for missing volumes
        extra_case = {}
        if outv < inv:
            if not sdict['is end']:
                extra_case['value'] = inv - outv
                extra_case['source'] = i
                extra_case['target'] = i + " to Other"
                nodes.append(extra_case)
        elif outv > inv:
            if not sdict['is resource']:
                extra_case['value'] = outv - inv
                extra_case['source'] = "Other to " + i
                extra_case['target'] = i
                nodes.append(extra_case)
    
    extra_ = df.drop('color', axis=1).append(pd.DataFrame(nodes)).reset_index(drop=True)
    a = []
    count = 0
    break_count = 0
    break_break = []
    while 'break' != break_break:
        drop = []
        for i in extra_['target']:
            if extra_[extra_['source'] == i].shape[0] == 0:
                if i[-5:] != 'Other' and i[-1:] != 'E':
                    drop.append(i)
        extra_ = extra_.loc[[k for k, v in extra_.iterrows() if v['target'] not in drop]]
        count += len(drop)
        a.extend(drop)
        if count == len(a):
            break_count += 1
        if break_count == 50:
            break_break = 'break'
    
    return extra_

SankeyWidget(layout=Layout(width="1500", height="1000"), 
             margins=dict(top=10, bottom=0, left=100, right=100),
             links=get_sankey_file('data/links_erl.csv', 2010, '1805085E', upstream=True).to_dict('records'))

Unweighted upstream graph built.


SankeyWidget(layout=Layout(height='1000', width='1500'), links=[{'source': '1805089PD', 'target': '1805085PD',…

------

sjdf = sjdf[(sjdf['source'] != 'SW1802SWP02') & (sjdf['target'] != 'SW1802SWP02') &
           (sjdf['source'] != 'SW1802SWP01') & (sjdf['target'] != 'SW1802SWP01') &
           (sjdf['source'] != 'R_Feather') & (sjdf['source'] != 'SW_CVP-ONL')
           & (sjdf['source'] != 'to SW_CVP-Coyot') & (sjdf['target'] != 'SW_CVP-ONL')]
sjdf

In [8]:
nodes = []
for i in sjdf['source'].unique():
    sdict = {}
    
    # check if resource/end or not
    check_resource = data[data['target'] == i].shape
    if check_resource[0] == 0:
        sdict['is resource'] = True
    else:
        sdict['is resource'] = False
    if i[-1:] == 'E':
        sdict['is end'] = True
    else:
        sdict['is end'] = False
    
    # compute in/out volumes
    outv = sjdf[sjdf['source'] == i]['value'].sum()
    inv = sjdf[sjdf['target'] == i]['value'].sum()
    
    # get values to compensate for missing volumes
    extra_case = {}
    if outv < inv:
        if not sdict['is end']:
            extra_case['value'] = inv - outv
            extra_case['source'] = i
            extra_case['target'] = i + " to Other"
            nodes.append(extra_case)
    if outv > inv:
        if not sdict['is resource']:
            extra_case['value'] = outv - inv
            extra_case['source'] = "Other to " + i
            extra_case['target'] = i
            nodes.append(extra_case)

In [10]:
extra_ = sjdf.drop('color', axis=1).append(pd.DataFrame(nodes)).reset_index(drop=True)
a = []
count = 0
break_count = 0
break_break = []
while 'break'!= break_break:
    drop = []
    for i in extra_['target']:
        if extra_[extra_['source'] == i].shape[0] == 0:
            if i[-5:] != 'Other' and i[-1:] != 'E':
                drop.append(i)
    extra_ = extra_.loc[[k for k, v in extra_.iterrows() if v['target'] not in drop]]
    count += len(drop)
    a.extend(drop)
    if count == len(a):
        break_count += 1
    if break_count == 50:
        break_break = 'break'

In [11]:
extra_

Unnamed: 0,source,target,value
0,1805089PD,1805085PD,58952.53
1,SW1805SWP08,1805089PD,29000.0
2,SW1805081EB,1805089PD,28884.0
6,SW_CVP-Coyot,1805089PD,65192.0
7,RES1805089,1805089PD,41006.0
8,GW1806-3-03,GW1806-3-03.01,40183.46
9,GW1806-3-03.01,1805089PD,26216.0
10,GW1805-2-09,GW1805-2-09.02,31878.92
11,GW1805-2-09.02,1805089PD,15892.0
12,GW1805-2-09,1805085PD,46507.37


In [None]:
SankeyWidget(layout=Layout(width="1500", height="1000"), 
             margins=dict(top=10, bottom=0, left=100, right=100),
             links=pd.read_csv('sanjose.csv').to_dict('records'))


In [246]:
sjdf[sjdf['target'] == 'Lk_OrovilleSWP']['value'].sum()

0.0

In [260]:
len("R_Feather")

9

In [None]:
sjdf.groupby('source').sum()

In [202]:
#sjdf.loc[54, 'value'] = 165184.0
#sjdf = pd.DataFrame(sank_dicts)
#sjdfdict = sjdf.to_dict('records')
sjdf.to_csv('sj.csv', index=False)

In [220]:
import json
sjdi = pd.read_csv('webtest/sj-edit.csv').to_dict('records')

with open('sj.json', 'w') as fp:
    json.dump(sjdi, fp, indent=4)

In [232]:
import json
def saveSankeyJSON(data, filename):
    rec = pd.read_csv(data).to_dict('records')
    nodeList = []
    nodes = set()
    for i in rec:
        s = i['source'];
        t = i['target'];
        if s not in nodes:
            nodes.add(s);
        if t not in nodes:
            nodes.add(t);

    for i in nodes:
        nodeList.append({'name':i})
    
    with open(filename, 'w') as fp:
        json.dump({
            "links": rec,
            "nodes": nodeList
        }, fp, indent=4)
    print(filename + " saved!") 
    print(json.dumps({
            "links": rec,
            "nodes": nodeList
        }, indent=4))

In [233]:
saveSankeyJSON('webtest/sankey.csv', 'fresno.json')

fresno.json saved!
{
    "links": [
        {
            "source": "SW_CVPFKC",
            "target": "1803033E",
            "value": 53121.0
        },
        {
            "source": "1803033GW",
            "target": "1803033PD",
            "value": 29572.94
        },
        {
            "source": "FRES_ID",
            "target": "1803033PD",
            "value": 18266.6
        },
        {
            "source": "SW_CVPFKC",
            "target": "1803033PD",
            "value": 14116.25
        },
        {
            "source": "RES_MLRTN",
            "target": "SW_CVPFKC",
            "value": 67371.23
        },
        {
            "source": "1803033PD",
            "target": "1803033E",
            "value": 61955.79
        },
        {
            "source": "1803033REC",
            "target": "1803033NPD",
            "value": 88.0
        },
        {
            "source": "1803033NPD",
            "target": "1803033E",
            "value": 88.0
        },
        

In [219]:
SankeyWidget(layout=Layout(width="1500", height="1000"), 
             margins=dict(top=10, bottom=0, left=100, right=100),
             links=pd.read_csv('webtest/sj-edit.csv').to_dict('records'))

SankeyWidget(layout=Layout(height='1000', width='1500'), links=[{'color': 'steelblue', 'source': '1805089PD', …

In [None]:
#graph without compensated volume
gi = subWESTnet('data/links_erl.csv', 2010)
gi.upstream('')
gi.graph.edges.values()

sank_dicts = []
for v in gi.edges.values():
    sank_dict = {}
    sank_dict['source'] = v['source']
    sank_dict['target'] = v['target']
    sank_dict['value'] = v['used_vol_af']
    sank_dict['color'] = 'steelblue'
    if v['used_vol_af'] == 0:
        sank_dict['color'] = 'goldenrod'
        sank_dict['value'] = .00001
    sank_dicts.append(sank_dict)

In [None]:
#get greyed out volumes
gi = subWESTnet('data/consolidated_links_2015.csv', 2015)
gi.upstream('1805085E')
gi.graph.edges.values()

sank_dicts = []
for v in gi.edges.values():
    sank_dict = {}
    sank_dict['source'] = v['source']
    sank_dict['target'] = v['target']
    sank_dict['value'] = v['used_vol_af']
    sank_dict['color'] = 'steelblue'
    if v['used_vol_af'] == 0:
        sank_dict['color'] = 'goldenrod'
        sank_dict['value'] = .00001
    sank_dicts.append(sank_dict)
    
flows = pd.DataFrame(sank_dicts)
in_val, out_val = {}, {}
for ind, row in flows.iterrows():
    source = row['source']
    src_targets = flows[flows.source == source]
    out_val[source] = sum(src_targets['value'])
    
    if flows[flows.target == source].shape[0] == 0:
        in_val[source] = 0
    
for ind, row in flows.iterrows():
    target = row['target']
    target_srcs = flows[flows.target == target]
    in_val[target] = sum(target_srcs['value'])
    
    if flows[flows.source == target].shape[0] == 0:
        out_val[target] = 0

new_nodes = {}
for i in sank_dicts:
    src = i['source']
    trgt = i['target']

    node_value = i['value']
    in_source = in_val[src]
    out_target = out_val[trgt]
    
    if in_source <= .00003 and in_source > 0 and out_target <= .00003 and out_target > 0 and i['color'] != 'goldenrod':
        to_src, from_trgt = ('other to ' + src, src), (trgt, trgt + ' to other')
        if to_src not in new_nodes:
            new_nodes[to_src] = {'source': to_src[0], 'target': to_src[1], 'value': node_value, 'color': 'grey'}
        if from_trgt not in new_nodes:
            new_nodes[from_trgt] = {'source': from_trgt[0], 'target': from_trgt[1], 'value': node_value, 'color': 'grey'}
    
    elif in_source < .0005:
        if out_target > node_value:
            new_val = out_target - node_value
            if round(in_val[trgt]) != round(out_target):# round(in_source) != round(node_value) or :
                tup = ('to ' + trgt, trgt)
                new_nodes[tup] = {'source': tup[0], 'target': trgt, 'value': new_val, 'color':'grey'}
                
        elif out_target < node_value:
            new_val = node_value - out_target
            tup = (trgt , trgt + ' to other')
            if tup not in new_nodes:
                new_nodes[tup] = {'source': tup[0], 'target': tup[1], 'value': new_val, 'color':'grey'}
                
    elif out_target < .00005:
        if in_source > node_value:
            new_val = in_source - node_value
            if out_val[src] == in_val[trgt]:
                tup = (src, src + ' to other')
                if tup not in new_nodes:
                    new_nodes[tup] = {'source': tup[0], 'target': tup[1], 'value': new_val, 'color':'grey'}
                    
        elif in_source < node_value:
            new_val = node_value - in_source  
            tup = ('to ' + src, src)
            if round(in_source) != round(node_value):
                new_nodes[tup] = {'source': tup[0], 'target': tup[1], 'value': new_val, 'color': 'grey'}
                
    elif in_source == node_value:
        if in_source > out_target:
            new_val = node_value - out_target
            tup = (trgt, 'from ' + trgt)
            new_nodes[tup] = {'source': tup[0], 'target': tup[1],'value': new_val, 'color': 'grey'}
        elif in_source < out_target:
            new_val = out_target - node_value
            tup = ('to ' + trgt, trgt)
            new_nodes[tup] = {'source': tup[0], 'target': tup[1], 'value': new_val, 'color': 'grey'}


sank_dicts.extend([v for v in new_nodes.values()])

In [21]:
### ignore cell ### 
json_for_r = {"nodes": [], "links": []}
node_nums = {}
num = 0

#first get node-num dict + add node names into json
for i in sank_dicts:
    source = i['source'] 
    target = i['target']
    if source not in node_nums:
        node_nums[source] = num
        json_for_r['nodes'].append({"name": source})
        num += 1
    if target not in node_nums:
        node_nums[target] = num
        num += 1
        json_for_r['nodes'].append({"name": target})
    else:
        pass

#get link data
for i in sank_dicts:
    source = node_nums[i['source']]
    target = node_nums[i['target']]
    value = i['value']
    json_for_r['links'].append({"source": source, "target": target, "value": value})
 
pd.DataFrame(sank_dicts).drop('color', axis=1).to_csv('sk.csv', index=False)

with open('test.json', 'w') as f:
    json.dump(json_for_r, f, indent = 4)

In [None]:
pd.DataFrame(sank_dicts).drop('color', axis=1).to_csv('sanjose.csv', index=False)

In [42]:
SankeyWidget(layout=Layout(width="1200", height="900"), 
             margins=dict(top=0, bottom=0, left=100, right=100),
             links=sank_dicts)#.auto_save_svg('1803033Eupstream.svg')

SankeyWidget(layout=Layout(height='900', width='1200'), links=[{'source': '1805089PD', 'target': '1805083PD', …

----

-----