In [2]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

### 最大的弱连通分量拿出来

In [3]:
# 读取weakly connected component.json
with open('weakly connected component.json', "r", encoding='utf-8') as r:
    wcc = json.loads(r.read())
wcc=np.array(wcc)
wcc.squeeze()
wcc.shape

(83,)

In [4]:
biggest_wcc=wcc[0]
biggest_wcc_com=biggest_wcc['component']
biggest_wcc_com

['812137',
 '2ade0908-d1c9-45de-9dbb-3dfec75d9317',
 '33560',
 '56126935',
 'Timothy Hampton',
 'Denise Vance',
 '5337',
 'Chad Nichols',
 '9ce492a2-de6e-4231-a018-318fc929908b',
 'Stacy Castillo',
 '63',
 'Blake Smith',
 '672',
 'Rhonda Johnson',
 '80774',
 'Leominster Provincetownâ',
 'Dawn Robinson',
 '2983',
 '1334578',
 'Andrea White',
 'Wendy Miller',
 '649009867',
 '26676370',
 'Brandon Berger',
 'Fisheries Animal Industriesâ',
 '76609426',
 'e2816018-e01c-4eac-8dc5-e84aebc22cff',
 '245658',
 '1f8c20da-5f20-4116-907a-a653089bb8a0',
 'Alyssa Gonzalez',
 'Jacob Caldwell',
 'David Mcfarland',
 'Ocean Packers',
 'c0ce6d15-622b-42a3-b892-f5efe5b4d00c',
 'Lindsey Conway',
 'Seaside Sojourns A/S Shipping',
 '262157542',
 '28890617',
 '72092',
 'â\x80\x9cillegalâ',
 '176',
 'ab195b55-453e-4436-ab0c-9f5660b4ec8c',
 'Jason Hughes',
 'Joshua Park',
 '67a59fab-710a-4992-b6d0-d581fd9dceba',
 '90be60b1-61d8-4115-b133-e4c8e09383eb',
 'Michelle Aguilar',
 'Patricia Byrd',
 '567217',
 '218',
 'J

In [5]:
v1=json.load(open('../../vast_2023_mc1/mc1_vis_system/public/MC1.json'))
merged_info=pd.read_csv('../../vast_2023_mc1/MC1/V1/merged_info.csv')
nodes_df=pd.DataFrame(v1['nodes'])
links_df=pd.DataFrame(v1['links'])

In [6]:
big_wcc_nodes=nodes_df[nodes_df['id'].isin(biggest_wcc_com)]
big_wcc_nodes

Unnamed: 0,type,dataset,country,id
0,company,MC1,Nalakond,Spanish Shrimp Carriers
1,organization,MC1,,12744
2,organization,MC1,,143129355
3,organization,MC1,,7775
4,organization,MC1,,1017141
...,...,...,...,...
3422,,MC1,,Felbab-Brown
3423,,MC1,,OzonActionâs
3424,,MC1,,âEntirely
3425,,MC1,,Ocano del Este SE


In [7]:
big_wcc_links=links_df[(links_df['source'].isin(biggest_wcc_com)) & (links_df['target'].isin(biggest_wcc_com))]
big_wcc_links.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10115 entries, 0 to 11068
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   type         10115 non-null  object 
 1   weight       10114 non-null  float64
 2   dataset      10115 non-null  object 
 3   source       10115 non-null  object 
 4   target       10115 non-null  object 
 5   key          10115 non-null  int64  
 6   weightlcoat  1 non-null      float64
dtypes: float64(2), int64(1), object(4)
memory usage: 632.2+ KB


Dijkstra

In [8]:
import itertools
import sys
from heapq import *

In [9]:
class Vertex:
    def __init__(self, name):
        self.key = name
        self.neighbor = {}
        self.cost = float("inf")
        self.predecessor = None
        self.pre_relationship = None
        self.pre_key = -1

    def add_neighbor(self, weight, vertex_name,key):
        # 有重边但是weight不一样，用key指示
        self.neighbor[(vertex_name,key)] = weight

    def add_pre(self, pre, relationship, key):
        self.predecessor = pre
        self.pre_relationship = relationship
        self.pre_key = key


In [10]:
def get_path(dst):
    result_path = []
    while dst.predecessor is not None:
        result_path.append(dst.key)
        result_path.append(dst.pre_relationship.values[0])
        result_path.append("key="+str(dst.pre_key))
        dst = dst.predecessor
    result_path.append(dst.key)
    return result_path

def clear_pre(all_ver):
    for ver in all_ver:
        all_ver[ver].predecessor = None
        all_ver[ver].cost = float("inf")

def dijkstra(source, destination, all_vertices):
    # path = {}
    counter = itertools.count()
    count_or = next(counter)
    heap = [(0, count_or, source)]
    heapify(heap)
    source.cost = 0
    visit = []

    while heap is not None:
        # get the top vertex of the heap update its cost to neighbors
        tmp_distance, tmp_order, tmp_vertex = heappop(heap)

        if tmp_vertex == destination:
            break

        visit.append(tmp_vertex.key)
        for neighbors in tmp_vertex.neighbor:
            # neighbors:(name,key)
            neighbor_name,neighbor_key=neighbors
            if neighbor_name not in visit:
                tmp_cost = tmp_distance + tmp_vertex.neighbor[neighbors]
                if tmp_cost < all_vertices[neighbor_name].cost:
                    relationship=big_wcc_links[(big_wcc_links['source']==tmp_vertex.key) & 
                                               (big_wcc_links['target']==neighbor_name)&
                                               (big_wcc_links['key']==neighbor_key)]['type']
                    # print(relationship)
                    all_vertices[neighbor_name].add_pre(tmp_vertex,relationship,neighbor_key)
                    all_vertices[neighbor_name].cost = tmp_cost
                    heappush(heap, (int(all_vertices[neighbor_name].cost), next(counter), all_vertices[neighbor_name]))



In [11]:
all_vertices = {}

for nodes in big_wcc_nodes['id']:
    all_vertices[nodes] = Vertex(nodes)
    
for links in big_wcc_links.iterrows():
    # print(links[1]['source'],links[1]['weight'],links[1]['target'])
    # dijkstra找小的走->权重大置信度大->更应该选择，用1减
    key=links[1]['key']
    all_vertices[links[1]['source']].add_neighbor(1-links[1]['weight'], links[1]['target'],key)
    


In [12]:
all_vertices
print("Kristin Jones" in all_vertices.keys())

True


In [13]:
# 试一下
clear_pre(all_vertices)
dijkstra(all_vertices["979893388"],all_vertices["8327"],all_vertices)
res=get_path(all_vertices["8327"])
for i in range(len(res)):
    print(res[-i-1])
# res

979893388
key=0
ownership
Erica Hamilton
key=0
partnership
160
key=0
membership
8327


In [14]:
clear_pre(all_vertices)
dijkstra(all_vertices["979893388"],all_vertices["Mar de la Vida OJSC"],all_vertices)
res=get_path(all_vertices["Mar de la Vida OJSC"])
for i in range(len(res)):
    print(res[-i-1])

979893388
key=0
partnership
Dana Goodwin
key=0
membership
Turkish Sword Buoy Incorporated Forwading
key=0
membership
Spanish Shrimp  Carriers
key=0
membership
Ancla Azul Company Solutions
key=0
membership
Mar de la Vida OJSC


In [15]:
# Ocean Fisheries Llc
clear_pre(all_vertices)
dijkstra(all_vertices["979893388"],all_vertices["Ocean Fisheries Llc"],all_vertices)
res=get_path(all_vertices["Ocean Fisheries Llc"])
for i in range(len(res)):
    print(res[-i-1])

979893388
key=0
partnership
Dana Goodwin
key=0
membership
Faroe Islands Shrimp Shark
key=1
family_relationship
png xi  Line
key=2
family_relationship
Ocean Fisheries Llc


In [16]:
# Ocean Fisheries Llc
clear_pre(all_vertices)
dijkstra(all_vertices["979893388"],all_vertices["Oceanfront Oasis Inc Carriers"],all_vertices)
res=get_path(all_vertices["Oceanfront Oasis Inc Carriers"])
for i in range(len(res)):
    print(res[-i-1])

979893388
key=0
ownership
Oceanfront Oasis Inc Carriers


In [17]:
clear_pre(all_vertices)
dijkstra(all_vertices["Mar de la Vida OJSC"],all_vertices["979893388"],all_vertices)
res=get_path(all_vertices["979893388"])
for i in range(len(res)):
    print(res[-i-1])

Mar de la Vida OJSC
key=0
family_relationship
b8567859-bf54-49fd-8332-5775e19c65af
key=1
partnership
Faroe Islands Shrimp Shark
key=0
partnership
979893388


In [18]:
clear_pre(all_vertices)
dijkstra(all_vertices["8327"],all_vertices["979893388"],all_vertices)
res=get_path(all_vertices["979893388"])
for i in range(len(res)):
    print(res[-i-1])

8327
key=0
membership
435054320
key=0
family_relationship
Mary Horton
key=0
family_relationship
979893388


In [20]:
clear_pre(all_vertices)
dijkstra(all_vertices["FishEye International"],all_vertices["979893388"],all_vertices)
res=get_path(all_vertices["979893388"])
for i in range(len(res)):
    print(res[-i-1])

FishEye International
key=0
membership
Federated
key=0
ownership
23990411
key=0
membership
979893388


In [28]:
clear_pre(all_vertices)
dijkstra(all_vertices["79"],all_vertices["979893388"],all_vertices)
res=get_path(all_vertices["979893388"])
for i in range(len(res)):
    print(res[-i-1])

79
key=0
ownership
png xi  Line
key=0
ownership
19
key=0
ownership
SeaSpray Wave SRL Solutions
key=3
family_relationship
979893388


In [30]:
clear_pre(all_vertices)
dijkstra(all_vertices["79"],all_vertices["78"],all_vertices)
res=get_path(all_vertices["78"])
for i in range(len(res)):
    print(res[-i-1])

79
key=0
ownership
138782226
key=0
membership
Spanish Shrimp  Carriers
key=0
ownership
3506021
key=0
partnership
348079
key=0
partnership
78


In [34]:
clear_pre(all_vertices)
dijkstra(all_vertices["348079"],all_vertices["78"],all_vertices)
res=get_path(all_vertices["78"])
for i in range(len(res)):
    print(res[-i-1])

348079
key=0
partnership
78
