In [4]:

import pandas as pd
import re

In [17]:
df = pd.read_excel("network.xlsx",
									 header=None,
									 names = ["first", "middle", "last"]).fillna("")

df['title'] = df.apply(lambda row: ' '.join(row) if row["middle"] != "" else row["first"] + " " + row["last"], axis=1)

In [6]:
with open("rob-top2000-labels.txt", "w") as f:
	f.write("\n".join(df['title']))

labelled_edges = []

for i, src in df.iterrows():
	linking_word = src['last']
	targets = df[df['first'] == linking_word]
	for j, tar in targets.iterrows():
		labelled_edges.append(((i, src['title']), (j, tar['title'])))

with open("rob-top2000-graph.txt", "w") as f:
	f.write(str(len(df['title'])) + "\n")

	for (i, _), (j, _) in labelled_edges:
		f.write(f'{i} {j}\n')

with open("rob-top2000-labelled-edges.txt", "w") as f:
	for (i, src_title), (j, tar_title) in labelled_edges:
		f.write(f"{src_title} -> {tar_title} ({i} -> {j})\n")

In [7]:
undirected_edges = {frozenset([i, j]) for (i, _), (j, _) in labelled_edges[:1000]}
undirected_edges_list = [tuple(e) for e in undirected_edges if len(e) > 1]

directed_undirected_edges_list = undirected_edges_list + [(t, s) for (s, t) in undirected_edges_list]

len(labelled_edges), len(undirected_edges), len(undirected_edges_list), len(directed_undirected_edges_list)

(3438, 1000, 991, 1982)

In [8]:
with open("rob-top2000-KALP.dimacs", "w") as f:
    f.write(f"p sp {len(df["title"])} {len(directed_undirected_edges_list)}\n")

    for s, t in directed_undirected_edges_list:
        f.write(f"a {s + 1} {t + 1} 1\n")

SyntaxError: f-string: unmatched '[' (3990666502.py, line 2)

In [10]:
import subprocess

with open("rob-top2000-graph.txt", "r") as f:
	result = subprocess.run(['../../lpath','SMART_FORCE'], stdin=f, stdout=subprocess.PIPE, text=True)

path = list(map(int, re.search(r'Longest path: (.+)\s', result.stdout).group(1).split()))
titles = [df.iloc[i]['title'] for i in path]

print(result.stdout)
print('\n'.join(titles))

Read graph. vertices: 1573, edges: 3438
Search mode: SMART_FORCE
Longest path length: 42
Longest path: 939 827 549 331 1366 1149 513 880 858 1507 684 832 855 935 1220 1002 1054 1376 1569 1382 1438 1345 240 152 485 931 876 1570 988 298 780 1454 900 1126 564 1073 398 857 1271 610 851 1014 735 
Time: 2.940488s

Sign Of The Times
Times Were When
When Doves Cry
Cry  Baby
Baby Can I Hold You
You Can't Hurry Love
Love You More
More Than This
This Charming Man
Man I Feel Like A Woman
Woman In Love
Love Is All
All For Nothing
Nothing Compares 2 You
You 've Got The Love
Love  Story
Story Of My Life
Life 's What You Make It
It 's The End Of The World As We Know It
It Must Have Been Love
Love Really Hurts Without You
You Need To Calm Down
Down  Down
Down  Under
Under The Bridge
Bridge Over Troubled Water
Water Of Love
Love Me Just A Little Bit More
More Than A Feeling
Feeling  Good
Good 4 You
You Shook Me All Night Long
Long Train Running
Running To Stand Still
Still Loving You
You Are So Beautifu

In [None]:
import subprocess

with open("top2000-graph.txt", "r") as f:
	result = subprocess.run(['../../lpath','DFBNB'], stdin=f, stdout=subprocess.PIPE, text=True)

print(result.stdout)


# Depth first Branch and Bound takes longer

In [7]:
# Enable imports form top-level of project (edit top_level_path accordingly)
import os
import sys
top_level_path = os.path.abspath(os.path.join('..', '..'))
if top_level_path not in sys.path:
	sys.path.append(top_level_path)

<module 'brute' from '/Users/billy/LongestPath/LongestPath/brute/__init__.py'>


In [23]:
import json


with open("rob-top2000-graph.txt", "r") as f:
	vertices, edges = from_string(f.read())

path_edges = list(zip(path, path[1:]))
print(path_edges)

data = {
	"nodes": [{"id": df.iloc[i]['title'], "group": 2 if i in path else 1} for i in range(vertices)],
	"links": [{"source": df.iloc[i]['title'], "target": df.iloc[j]['title'], "value": 1 if (i,j) in path_edges else 0} for (i,j) in edges],
}

with open('graph.json', 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False)

[(939, 827), (827, 549), (549, 331), (331, 1366), (1366, 1149), (1149, 513), (513, 880), (880, 858), (858, 1507), (1507, 684), (684, 832), (832, 855), (855, 935), (935, 1220), (1220, 1002), (1002, 1054), (1054, 1376), (1376, 1569), (1569, 1382), (1382, 1438), (1438, 1345), (1345, 240), (240, 152), (152, 485), (485, 931), (931, 876), (876, 1570), (1570, 988), (988, 298), (298, 780), (780, 1454), (1454, 900), (900, 1126), (1126, 564), (564, 1073), (1073, 398), (398, 857), (857, 1271), (1271, 610), (610, 851), (851, 1014), (1014, 735)]


In [27]:
words = [df.iloc[i]['first'] for i in range(vertices)] + [df.iloc[i]['last'] for i in range(vertices)]
words = list(set(words))
path_words = set([df.iloc[i]['first'] for i in path] + [df.iloc[i]['last'] for i in path])

title_links = [
	{
		"source": row["first"],
		"target": row["last"],
		"title": row["title"],
		"value": 1 if row["title"] in titles else 0,
	} for _, row in df.iterrows()]

dual_data = {
	"nodes": [{"id": word, "group": 1 if word in path_words else 0} for word in words],
	"links": title_links,
}

with open('dual_graph.json', 'w', encoding='utf-8') as f:
	json.dump(dual_data, f, ensure_ascii=False)

