# Edge transparency
This note books experiments with edge transparecy in  order to make the visualisation of large graph less crowded.
Thie idea is to increase the transparency (down to zero) of edges that are not 'focused'.

### Edge color
To denote the amount of currency transferred we use colors from blue to red (the warmer the color the higher the amount)

### Node Sizes
Nodes with higher degree are bigger but the size should be capped

In [15]:
import cugraph
import cudf
import pandas as pd
import numpy as np
import time, random, math

from graph_tool.all import *

from pprint import pprint

# from blue to red
colors = [(0, 0, 255, 1), (255, 255, 0, 1), (255, 0, 0, 1)]

def make_gpu_frame(data):
    '''
    given data it makes a cuGraph gpu frame which is similar to a pandas frame
    '''
    # create mapping from address to vertex id
    column_values = data[["source", "target"]].values.ravel()
    unique_values = pd.unique(column_values)
     # index to vertex
    mapping = pd.DataFrame(unique_values).reset_index().rename(columns = {"index":"vertex",0:"address"} )
    print(mapping.head())
    
    # source target   amount  blockNumber  source_id 
    
    # associate source id to the source address
    data_ids = data.merge(mapping.rename(columns={"address":"source"})).rename(columns = {"vertex":"source_id"})
    
    # associate target_id with target address
    data_ids = data_ids.merge(mapping.rename(columns={"address":"target"})).rename(columns = {"vertex":"target_id"})
    
    data_ids = data_ids[["source_id", "target_id"]]
    
    data_ids = cudf.DataFrame.from_pandas(data_ids)
    graph = cugraph.Graph()
    graph.from_cudf_edgelist(data_ids, source='source_id', destination='target_id')
    return graph

def make_cuGraph_layout(graph):
    '''
   A layout is a pd frame like below:
   
                  x         y  vertex
        0  0.463815 -0.143211      29
        1 -0.549100 -5.686268     139
        2 -6.020377 -1.174636     186
        3 -1.020328  3.608905      73
        4 -5.511849  2.750963     123
    '''
    
    layout = cugraph.layout.force_atlas2(graph, max_iter=500,
                                     strong_gravity_mode=True,
                                     barnes_hut_theta=1.2,
                                     outbound_attraction_distribution=False,
                                     gravity=1,
                                     scaling_ratio=1)
    layout = layout.to_pandas()
    return layout

def get_interest_points(layout):
    '''
    center, top-left etc..
    '''
    mean_x = layout['x'].mean()
    mean_y = layout['y'].mean()
    
    max_x = layout['x'].max()
    min_x = layout['x'].min()
    max_y = layout['y'].max()
    min_y = layout['y'].min()

    std_x = layout['x'].std()
    std_y = layout['y'].std()

    return {'center':(mean_x, mean_y), 
            'tl': (min_x, max_y), 
            'tr': (max_x, max_y), 
            'bl': (min_x, min_y), 
            'br': (max_x, min_y)
           }

def convert_to_graph_tool_graph(layout, data=pd.DataFrame({'A' : []})):
    '''
    Takes a layout and gives the info needed to render the graph with graph tool
    '''
    g = Graph(directed=True)

    # take the layout from cugraph and convert it to a graph-tool vertex property
    vprop_double = g.new_vertex_property("vector<double>")
    
    max_x = -99999
    min_x = 99999
    max_y = -99999
    min_y = 99999
    last_i = 0
    for i, row in enumerate(layout.sort_values("vertex")[["x", "y"]].values):
        print(i)
        vprop_double[i] = row
        
        if row[0] > max_x:
            max_x = row[0]
            
        if row[0] < min_x:
            min_x = row[0]
            
        if row[1] > max_y:
            max_y = row[1]
            
        if row[1] < min_y:
            min_y = row[1]
            
        last_i = i
    max_max = max(max_x, max_y)
    min_min = min(min_x, min_y)
    vprop_double[last_i + 1] = (max_max ,max_max)
    vprop_double[last_i + 2] = (min_min ,min_min)
      
    if not data.empty:
        print(data['amount'])
        eweight = g.new_edge_property("string")
        data = data.append({'source':str(last_i + 1) + "fakemax",
                     'target':str(last_i + 1) + "fakemax",
                     'amount':'9999999'}, ignore_index=True)
        
        data = data.append({'source':str(last_i + 1) + "fakemin",
                     'target':str(last_i + 1) + "fakemin",
                     'amount':'9999999'}, ignore_index=True)
        print(data.tail())
        print()
        g.add_edge_list(data[["source", "target", "amount"]].values, hashed=True, eprops=[eweight])
        print(data[["source", "target", "amount"]])
        print("------")
        print(vprop_double.get_2d_array([0,1]))
        
        # create an edge property representing edge width
        esize = g.new_edge_property("float")
        esize.a = list(map(lambda x: float(x), list(eweight)))
        
        deg = g.degree_property_map("in")
        deg.a = 4 * (np.sqrt(deg.a) * 0.1 + 0.4)
        print(deg.a)
        print("deg.a[last_i + 1] " + str(deg.a[last_i + 1]))
        
        

        
        thirdy_third_q = np.quantile(esize.a, 0.33)
        sixty_six_q = np.quantile(esize.a, 0.66)
        edge_colors = g.new_edge_property("vector<double>")
        
        edge_length_property = g.new_edge_property("double")
        
        for e in g.edges():
            s = esize[e]
            edge_colors[e] = colors[0] if s < thirdy_third_q else colors[1] if s < sixty_six_q else colors[2]
            
            s = e.source()
            t = e.target()
            distance = math.dist(vprop_double[s], vprop_double[t])
            edge_length_property[e] = distance
            
        
        #edge_to_colors = list(map(lambda index : colors[index], edge_to_index))
        
        esize.a = esize.a / (esize.a.max() / 1000.0)
        esize.a = np.sqrt(esize.a)
    else:
        esize = None

    # create a degree map for different vertex sizes based on vertex degree
    
        
    return {'graph': g, 
            'deg': deg,
            'pos':vprop_double,
            'esize':esize,
           'colors': edge_colors,
           'max_min_length':[max(edge_length_property.a), min(edge_length_property.a)],
           'edge_lengths': edge_length_property}

def draw(graph_data, zoom_level):
    '''
    Graph tool rendering
    '''
    
    number_of_images = 4 ** zoom_level
    divide_by = int(math.sqrt(number_of_images))
    tuples = []
    for x in range(0, divide_by):
        for y in range(0, divide_by):
            tuples.append((x, y))
            
            
    xs = graph_data['pos'].get_2d_array([0])
    ys = graph_data['pos'].get_2d_array([1])
    width = xs.max() - xs.min()
    height = ys.max() - ys.min()
    min_x = xs.min()
    max_x = xs.max()
    min_y = ys.min()
    max_y = ys.max()

    tc = TransparencyCalculator(graph_data['max_min_length'][1], graph_data['max_min_length'][0],3)
    for e in graph_data['graph'].edges():
        l = graph_data['edge_lengths'][e]
        c = graph_data["colors"][e]
        graph_data["colors"][e] = (c[0], c[1], c[2], tc.get_transparency(l, zoom_level))
            
    for t in tuples:
        fit =  (
        round(min_x + ((width / divide_by) * t[0]), 2),
        round(min_y + ((height / divide_by) * t[1]), 2),
        round(width / divide_by, 2),
        round(height / divide_by, 2))
        
        print(fit)
                
        file_name = "./zooms/" + "z_" + str(zoom_level) + "x_" + str(t[0]) + "y_" + str(t[1]) + ".png"
        
       
        
        
        
            
            
        print(graph_data['graph'])
        
        
        
        
        graph_draw(graph_data['graph'],
            pos=graph_data['pos'],
            bg_color='grey',
            vertex_size=graph_data['deg'],
            vertex_fill_color=[1,1,1,1],
            edge_color=graph_data["colors"],
            output=file_name,
            output_size=[1000, 1000],
            fit_view=fit,
            adjust_aspect=False,
                  fit_view_ink=True)
        
   
    #### old
    xs = graph_data['pos'].get_2d_array([0])
    ys = graph_data['pos'].get_2d_array([1])
    width = xs.max() - xs.min()
    height = ys.max() - ys.min()
    center_x = xs.max() + xs.min()
    center_y = ys.max() + ys.min()
    print("xs.max() " + str(xs.max()))
    print("xs.min() " + str(xs.min()))
    print("ys.min() " + str(ys.min()))
    print("ys.max() " + str(ys.max()))
    print("w : " + str(width))
    print("h : " + str(height))
    print((np.mean(xs), np.mean(ys)))    

In [11]:
import numpy as np


# production
class TransparencyCalculator:

    def __init__(self, min_length, max_length, zoom_levels):
        if min_length < 0:
            raise Exception("min_length has to be positive")
        self.min_length = min_length
        self.max_length = max_length
        self.zoom_levels = zoom_levels
        self.intervals = []
        self._calculate_intervals()

    def _calculate_intervals(self):

        step = (self.max_length - self.min_length) / self.zoom_levels
        for i in range(0, self.zoom_levels):
            interval = (self.min_length + (i * step),
                        self.min_length + ((i + 1) * step))
            self.intervals.append(interval)
        self.intervals.reverse()

    def get_transparency(self, edge_length, zoom_level):
        #print("self.min_length: " + str(self.min_length))
        #print("self.max_length: " + str(self.max_length))
        
        interval = self.intervals[zoom_level]
        #print("interval: ", interval)
        length_equal_min = self.I(interval[0], self.min_length)
        result_left_equation = (edge_length - self.min_length + length_equal_min) / \
                               (interval[0] - self.min_length + length_equal_min)

        length_equal_max = self.I(interval[1], self.max_length)
        result_right_equation = (self.max_length - edge_length + length_equal_max) / \
                                (self.max_length - interval[1] + length_equal_max) 

        return min(1, result_left_equation, result_right_equation)

    def I(self, a, b):
        return 1 if a == b else 0

In [3]:
# Tests for transparency class

for min_length, max_length, zoom_levels in [(1, 10, 3), (10, 100, 10), (0, 10, 10)]:
    tc = TransparencyCalculator(min_length, max_length, zoom_levels)
    intervals = tc.intervals
    assert len(intervals) > 0
    assert len(intervals) == zoom_levels

    lengths = []
    for interval in intervals:
        length = interval[1] - interval[0]
        assert length > 0
        lengths.append(length)

    assert np.std(lengths) < 0.1

try:
    exception_raised = False
    tc = TransparencyCalculator(-1, 10, 2)
except Exception:
    exception_raised = True
assert exception_raised

###
tc = TransparencyCalculator(1, 10, 3)
long_edge = 10
totally_zoomed_out = 0
transparency = tc.get_transparency(long_edge, totally_zoomed_out)
assert transparency == 1
short_edge = 1
transparency = tc.get_transparency(short_edge, totally_zoomed_out)
assert transparency == 0
medium_edge = 4
transparency = tc.get_transparency(medium_edge, totally_zoomed_out)
assert 0.4 < transparency < 0.6

t = tc.get_transparency(10, 2)
assert t == 0
t = tc.get_transparency(1, 2)
assert t == 1
t = tc.get_transparency(7, 2)
assert 0.4 < t < 0.6

In [4]:
# Loading the data
small_data = pd.read_csv("./networks/small.csv", dtype={'amount': object})
medium_data = pd.read_csv("./networks/medium.csv", dtype={'amount': object})
big_data = pd.read_csv("./networks/large.csv", dtype={'amount': object})

In [5]:
# Converting raw data to cuGraph frames
small_gpu_frame = make_gpu_frame(small_data)
medium_gpu_frame = make_gpu_frame(medium_data)
big_gpu_frame = make_gpu_frame(big_data)

   vertex                                     address
0       0  0x5719e1bc888efa00dc5b2d992ca364889129a869
1       1  0xb343677875f5de619dbdff475378716101dcb3ad
2       2  0x997c48ce1af0ce2658d3e4c0bea30a0eb9c98382
3       3  0x8533a0bd9310eb63e7cc8e1116c18a3d67b1976a
4       4  0xe9c1a41b0ba27e80b138c0e17e7cc681b26099cf
   vertex                                     address
0       0  0x0000000000000000000000000000000000000000
1       1  0xdd2a5b646bb936cbc279cbe462e31eab2c309452
2       2  0xa631ec94edce1fe78cd7344a029b6c37c0df7dca
3       3  0x275c79f37be0561c8e6525233ba5b6a75a15a5da
4       4  0x7be8a700c46b68cdcf11112bdc64bce592976085
   vertex                                     address
0       0  0x88e2efac3d2ef957fcd82ec201a506871ad06204
1       1  0x67fa2c06c9c6d4332f330e14a66bdf1873ef3d2b
2       2  0x00954e1c8fcf1c5c274aa10b1260a94564f47b58
3       3  0x023656f850bbf662e71006b3891e797653503286
4       4  0x8188a619f72f9a889d6079848fb34cf48371890c


In [6]:
# Running cuGraph force atlas 2 to generate (x,y) pairs for each graph
small_layout = make_cuGraph_layout(small_gpu_frame)
medium_layout = make_cuGraph_layout(medium_gpu_frame)
big_layout = make_cuGraph_layout(big_gpu_frame)
print("The graphs have: " + str(len(small_layout)) + " " + str(len(medium_layout)) + " " + str(len(big_layout)) + " nodes")

The graphs have: 197 11308 508083 nodes


In [16]:
# Rendering the small graph
small = convert_to_graph_tool_graph(small_layout, small_data)
draw(small, zoom_level=0)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
0           3e+08
1         3.6e+07
2         1500000
3          750000
4            4050
          ...    
273    178771.426
274     22923.242
275       2606110
276     15081.987
277    804848.205
Name: amount, Length: 278, dtype: object
                                         source  \
275  0x274f3c32c90517975e29dfc20

In [11]:
# Rendering hte medium graph
medium = convert_to_graph_tool_graph(medium_layout, medium_data)
draw(medium, zoom_level=1)

0        100000000000000000
1             2500000000000
2             2500000000000
3             2500000000000
4             2500000000000
                ...        
20907        37425500000000
20908         2500000000000
20909        44707545000000
20910        37425500000000
20911        37425500000000
Name: amount, Length: 20912, dtype: object
                                          address  blockNumber eventName  \
20909  0xef51c9377feb29856e61625caf9390bd0b67ea18    8471887.0  Transfer   
20910  0xef51c9377feb29856e61625caf9390bd0b67ea18    8496927.0  Transfer   
20911  0xef51c9377feb29856e61625caf9390bd0b67ea18    8497982.0  Transfer   
20912                                         NaN          NaN       NaN   
20913                                         NaN          NaN       NaN   

                                           source  \
20909  0x274f3c32c90517975e29dfc209a23f315c1e5fc7   
20910  0xdfba4b7cf78dfa133575fd1da44aebf4955ccbb4   
20911  0xf6a1161e7856b0cf1e4a5495

In [160]:
# Rendering hte medium graph
medium = convert_to_graph_tool_graph(medium_layout, medium_data)
draw(medium, zoom_level=1)

0        100000000000000000
1             2500000000000
2             2500000000000
3             2500000000000
4             2500000000000
                ...        
20907        37425500000000
20908         2500000000000
20909        44707545000000
20910        37425500000000
20911        37425500000000
Name: amount, Length: 20912, dtype: object
                                          address  blockNumber eventName  \
20909  0xef51c9377feb29856e61625caf9390bd0b67ea18    8471887.0  Transfer   
20910  0xef51c9377feb29856e61625caf9390bd0b67ea18    8496927.0  Transfer   
20911  0xef51c9377feb29856e61625caf9390bd0b67ea18    8497982.0  Transfer   
20912                                         NaN          NaN       NaN   
20913                                         NaN          NaN       NaN   

                                           source  \
20909  0x274f3c32c90517975e29dfc209a23f315c1e5fc7   
20910  0xdfba4b7cf78dfa133575fd1da44aebf4955ccbb4   
20911  0xf6a1161e7856b0cf1e4a5495

In [158]:
# Rendering hte medium graph
medium = convert_to_graph_tool_graph(medium_layout, medium_data)
draw(medium, zoom_level=2)

0        100000000000000000
1             2500000000000
2             2500000000000
3             2500000000000
4             2500000000000
                ...        
20907        37425500000000
20908         2500000000000
20909        44707545000000
20910        37425500000000
20911        37425500000000
Name: amount, Length: 20912, dtype: object
                                          address  blockNumber eventName  \
20909  0xef51c9377feb29856e61625caf9390bd0b67ea18    8471887.0  Transfer   
20910  0xef51c9377feb29856e61625caf9390bd0b67ea18    8496927.0  Transfer   
20911  0xef51c9377feb29856e61625caf9390bd0b67ea18    8497982.0  Transfer   
20912                                         NaN          NaN       NaN   
20913                                         NaN          NaN       NaN   

                                           source  \
20909  0x274f3c32c90517975e29dfc209a23f315c1e5fc7   
20910  0xdfba4b7cf78dfa133575fd1da44aebf4955ccbb4   
20911  0xf6a1161e7856b0cf1e4a5495