In [36]:
"""
This was an extra side-project notebook counts how many different lines each line connects to.
This also attempts to find a lines "best friend", being the line they share the most stations
with the criteria being sharing at least 4 stations & not having multiple best friends.
The main result is the table printed at the very bottom.
"""

import os
from TransitGraph import TransitGraph

tg = TransitGraph()

In [37]:
class Line:
	"""
	Class to hold information about each line, mainly the number of shared stations they have
	with each other line.
	"""
	def __init__(self, input_name: str):
		self.name = input_name  # name of line
		self.stations: list[str] = []  # list of its own stations
		# each other line & the number of shared stations with this line
		self.counts: dict[str, int] = {}
		# total number of other lines that this line connects with,
		# being equivalent to `len(self.counts)`
		self.friends = 0
		self.best_friend: str = 'None'  # name of line with most shared stations
		# number of stations this line shares with the best friend
		self.best_friend_stations: int = 0
		# whether the best friend has different best friend themselves
		# (thus not mutual best friends)
		self.best_friend_fake = False

	def add_count(self, new_line_name: str):
		"""
		Adds a specific connection to the respective line. This will automatically create a new
		entry or increment the number of connections.
		"""
		#print(f'{self.name} is now getting {new_line_name}')
		if new_line_name in self.counts.keys():

			self.counts[new_line_name] += 1
			#print(f'\tis now {self.counts[new_line_name]}')
		else:
			self.counts[new_line_name] = 1
			self.friends += 1
			#print('\tnew friend!')

	def set_best_friend(self):
		"""
		Sets the variables `self.best_friend` & `self.best_friend_stations`.
		"""
		maxi = 0  # value for number of stations
		maxi_name = None  # name of line with max number of stations
		# To indicate whether there are multiple maximums. If there are, then there are
		# technically no single best friends
		overlap = False

		# probably a better way to find the max value in a dictionary but i already wrote this
		for line_name, count in self.counts.items():
			#if self.name == '11': print(f'checking {name} w {count}')
			if count > maxi:
				#if self.name == '11': print(f'\t{name} has more than {maxi}')
				overlap = False
				maxi_name = line_name
				maxi = count
			elif count == maxi:
				#if self.name == '11': print(f'\t{name} has equal {maxi}')
				overlap = True

		# only set if they share more than 4 stations
		if not overlap and maxi >= 4:
			self.best_friend = maxi_name
			self.best_friend_stations = maxi


def get_counts(only: list[str] = None, line_folder_name: str = 'lines') -> dict[str, Line]:
	"""
	Primary wrapper to reads the station data in from the `lines` folder & returns the
	statistics for every line.

	:param only: Filter specific lines to read instead of all of them.
	:param line_folder_name: Name of the folder containing the station data.
	:return: Dictionary with the key being the name of the line & the value being object `Line`
		holding the statistics for the line. The `Line` object can be verified by asserting
		`line_name == dict['line_name'].name`.
	"""


	line_counts_builder: dict[str, Line] = {}  # return variable

	for line_index, file_name in enumerate(os.listdir(line_folder_name)):

		# get name & create object
		line_name = file_name.split('.')[0]

		# filter line if needed
		if only is not None:
			if line_name not in only:
				continue

		line_counts_builder[line_name] = Line(line_name)

		# read in stations for that line
		with open(f'{line_folder_name}/{file_name}', 'r') as station_list:
			for station in station_list:
				line_counts_builder[line_name].stations.append(station.strip())

		# append friends
		assert line_counts_builder[line_name].counts == {}, \
			f'{line_name} has already {line_counts_builder[line_name].counts}'

		# acquire connecting lines for respective line
		for station in line_counts_builder[line_name].stations:
			connecting_lines = set()
			for edge in tg.edges(station):
				for connecting_line in tg.get_edge_data(edge[0], edge[1])['lines']:
					if line_name == connecting_line: continue  # skip itself
					connecting_lines.add(connecting_line)
			for connecting_line in connecting_lines:
				line_counts_builder[line_name].add_count(connecting_line)

	return line_counts_builder



# add friends
# for index, node in enumerate(tg.nodes):
# 	for edge in tg.edges(node):
# 		print(f'{edge}: w/ {tg.get_edge_data(edge[0], edge[1])['lines']}')
	# lines={line_name},

In [38]:
#line_counts_list: list[Line] = []
line_counts = get_counts()

# for lines lacking best friends, keep an empty dictionary entry
# line_counts['None'] = Line('None')
# line_counts['None'].best_friend = 'Invalid'

for _, line in line_counts.items():
		line.set_best_friend()


In [39]:
# find fake best friends
verbose_lines = ['U4', '71']

for name, line in line_counts.items():
	if line.name in verbose_lines:
		print(f'\nchecking {line.name}')
		if line.best_friend != 'None':
			print(
				f'\tits best friend is {line.best_friend}, '
				f'& their best friend is {line_counts[line.best_friend].best_friend}'
			)
		else:
			print('\tno best friend')

	if line.best_friend != 'None' and line.name != line_counts[line.best_friend].best_friend:
		if line.name in verbose_lines:
			print(
				f'\t{line.best_friend} '
				f'after: {line_counts[line.best_friend].best_friend_fake}'
			)
		line.best_friend_fake = True



checking 71
	its best friend is 11, & their best friend is 71

checking U4
	no best friend


In [40]:
# sort
line_counts_list = sorted(line_counts.values(), key=lambda l: l.friends, reverse=True)
#line_counts_list.sort(key=lambda l: l.friends, reverse=True)

In [43]:
# print extracted information

header = [
	'Name',  # name of line in focus
	'# Friends',  # amount of lines they have a connection with
	'Best friend',  # the line they have the most connections with
	'# Shared',  # the number of stations shared with said line
	'Fake best friend',  # if this best friend has a different best friend of their own
	'Their best friend'  # said best friends actual best friend
]

header = [
	header_element +
	':' +
	' ' * 3  # spacing between columns
	for header_element in header]

print(*header, sep='', end='')

for index, line in enumerate(line_counts_list):
	print(
		f'\n{line.name:<{len(header[0])}}'
		f'{line.friends:<{len(header[1])}}'
		f'{line.best_friend:<{len(header[2])}}',
		end=''
	)
	if line.best_friend != 'None':
		print(f'{line.best_friend_stations:<{len(header[3])}}', end='')
		if line_counts[line.name].best_friend_fake:
			print(
				f'{'yes':<{len(header[4])}}'
				f'{line_counts[line.best_friend].best_friend}',
				end=''
			)
		else:
			print(f'{'no':<{len(header[4])}}', end='')
	if (index+1) % 10 == 0:  # for scrolling readability
		print('\n', *header, sep='', end='')



Name:   # Friends:   Best friend:   # Shared:   Fake best friend:   Their best friend:   
U6      33           None           
D       32           71             8           yes                 11
5       30           33             11          no                  
71      27           11             14          no                  
U3      27           None           
1       27           None           
U4      27           None           
S4      25           S3             16          no                  
S2      25           None           
S3      25           S4             16          no                  
Name:   # Friends:   Best friend:   # Shared:   Fake best friend:   Their best friend:   
S1      25           S2             12          yes                 None
2       24           1              6           yes                 None
U2      24           None           
18      23           6              9           no                  
6       22           18             