In [None]:
import pandas as pd
import polars as pl
import numpy as np
import networkx as nx
import heapq
import copy
import sys
sys.path.insert(1, '../framework')
from racetrack import *
rt = RACETrack()

In [None]:
df = pd.DataFrame({'fm':['a',  'a',  'a',  'a',  'b',  'b',  'b',  'c',  'c',  'd',  'd',  'd', 'd'],
                   'to':['b',  'c',  'd',  'b',  'a',  'b',  'c',  'a',  'b',  'c',  'a',  'b', 'd'],
                   'ct':[10,   20,   5,    1,    20,   3,    5,    10,   15,   5,    10,   50,  20]})
def test(df, num, params):
    print(f'{num}.old_pd', rt.dendrogramOrdering(df,                       *params))
    #print(f'{num}.old_pl', rt.dendrogramOrdering(pl.DataFrame(df),         *params))
    print(f'{num}.hdb_pd', rt.dendrogramOrdering_HDBSCAN(df,               *params))
    #print(f'{num}.hdb_pl', rt.dendrogramOrdering_HDBSCAN(pl.DataFrame(df), *params))
    print(f'{num}.tup_pd', rt.dendrogramOrderingTuples(df,               *params))
    #print(f'{num}.tup_pl', rt.dendrogramOrderingTuples(pl.DataFrame(df), *params))
def testAll(df):
    params = ['fm', 'to', None, False]
    test(df,1,params)
    print()
    params = ['fm', 'to', 'ct', False]
    test(df,2,params)
    print()
    params = ['fm', 'to', 'ct', True] 
    test(df,3,params)
testAll(df)

In [None]:
params = ['fm', 'to', 'ct', False]
# test(df,4,params)
rt.tile([rt.chordDiagram(df, relationships=[('fm','to')],count_by='ct', draw_labels=True, txt_h=16, use_hdbscan_for_dendrogram=True),
         rt.chordDiagram(df, relationships=[('fm','to')],count_by='ct', draw_labels=True, txt_h=16, use_hdbscan_for_dendrogram=False)])

In [None]:
df = pd.DataFrame({'fm':['a',  'x',  'n',  'r',  'h', 'h'],
                   'to':['r',  'y',  'm',  'g',  'a', 'i'],
                   'ct':[20,   20,   20,    1 ,   1,   1]})
params = ['fm', 'to', 'ct', False]
# test(df,4,params)
rt.tile([rt.chordDiagram(df, relationships=[('fm','to')],count_by='ct', draw_labels=True, txt_h=16, use_hdbscan_for_dendrogram=True),
         rt.chordDiagram(df, relationships=[('fm','to')],count_by='ct', draw_labels=True, txt_h=16, use_hdbscan_for_dendrogram=False)])

In [None]:
df = pd.DataFrame({'fm':['a',  'a',  'a',  'a',  'b',  'b',  'b',  'c',  'c',  'd',  'd',  'd', 'd'],
                   'to':['b',  'c',  'd',  'b',  'a',  'b',  'c',  'a',  'b',  'c',  'a',  'b', 'd'],
                   'ct':[10,   20,   5,    1,    20,   3,    5,    10,   15,   5,    10,   50,  20]})

fm, to, count_by, count_by_set = 'fm', 'to', 'ct', False
# concats two strings in alphabetical order   
df = rt.copyDataFrame(df)
df['__fmto__'] = df.apply(lambda x: rt.__den_fromToString__(x, fm, to), axis=1)
if count_by is None:
    df_den   = df.groupby('__fmto__').size().reset_index().rename({0:'__countby__'},axis=1)
    count_by = '__countby__'
elif count_by_set:
    df_den = df.groupby('__fmto__')[count_by].nunique().reset_index()
else:
    df_den = df.groupby('__fmto__')[count_by].sum().reset_index()

# create the initial graph and heap
_heap_ , _graph_ = [] , {}
for r_i,r in df_den.iterrows():
    x, y = rt.__den_fromToStringParts__(r['__fmto__'])
    heapq.heappush(_heap_,(-r[count_by], ((x,),(y,))))
    if x != y:
        if (x,) not in _graph_.keys():
            _graph_[(x,)] = {}
        _graph_[(x,)][(y,)] = -r[count_by]
        if (y,) not in _graph_.keys():
            _graph_[(y,)] = {}
        _graph_[(y,)][(x,)] = -r[count_by]

_graph_orig_ = copy.deepcopy(_graph_)

def optimalArrangement(t0,t1):
    if len(t0) == 1 and len(t1) == 1:
        return t0 + t1
    elif len(t0) == 1:
        f,b = 0,0
        for i in range(len(t1)):
            if (t1[i],) in _graph_orig_[t0].keys():
                s = _graph_orig_[t0][(t1[i],)]
                f += s * 1/(1+i)
                b += s * 1/(len(t1)-i)
        if f > b:
            return t1 + t0
        else:
            return t0 + t1
    elif len(t1) == 1:
        f,b = 0,0
        for i in range(len(t0)):
            if (t0[i],) in _graph_orig_[t1].keys():
                s = _graph_orig_[t1][(t0[i],)]
                f += s * 1/(1+i)
                b += s * 1/(len(t0)-i)
        if f > b:
            return t0 + t1
        else:
            return t1 + t0
    else:
        print('happens!') # does this actually happen?
        pass
    return t0 + t1

_graph_dfs_ = [] # debug
_merged_already_ = set()
while len(_heap_) > 0:
    _strength_, _fmto_ = heapq.heappop(_heap_)
    _fm_, _to_ = _fmto_
    if type(_fm_) != tuple:
        _fm_ = (_fm_,)
    if type(_to_) != tuple:
        _to_ = (_to_,)
    if _fm_ != _to_ and _fm_ not in _merged_already_ and _to_ not in _merged_already_:
        _graph_dfs_.append(rt.graphDictToDataFrame(_graph_)) # debug
        _merged_already_.add(_fm_), _merged_already_.add(_to_)

        _new_ = optimalArrangement(_fm_, _to_)
        
        _graph_[_new_] = {}
        # Rewire for _fm_
        for x in _graph_[_fm_].keys():
            if x not in _graph_[_new_].keys():
                _graph_[_new_][x] = 0    
            _graph_[_new_][x] += _graph_[_fm_][x]
        # Rewire for _to_
        for x in _graph_[_to_].keys():
            if x not in _graph_[_new_].keys():
                _graph_[_new_][x] = 0
            _graph_[_new_][x] += _graph_[_to_][x]
        # Rewire the neighbors & add the new values to the heap
        for x in _graph_[_new_].keys():
            _graph_[x][_new_] = _graph_[_new_][x]
            heapq.heappush(_heap_,(_graph_[_new_][x], (_new_, x)))
        # Remove the old nodes and their nbor connections
        for x in _graph_[_fm_]:
            _graph_[x].pop(_fm_)
        _graph_.pop(_fm_)
        for x in _graph_[_to_]:
            _graph_[x].pop(_to_)
        _graph_.pop(_to_)
print(_graph_)
_tuple_ = ()
for k in _graph_.keys():
    _tuple_ += k
list(_tuple_)

In [None]:
svgs = []
for _df_ in _graph_dfs_:
    gnx = rt.createNetworkXGraph(_df_, [('fm','to')])
    pos = nx.spring_layout(gnx)
    svgs.append(rt.linkNode(_df_, [('fm','to')], pos, link_shape='curve', bounds_percent=0.4))
rt.table(svgs)

In [None]:
_graph_orig_

In [None]:
_graph_