# TITLE

1. Hub Navigation Analysis

2. Path Efficiency Analysis

3. Link Position Impact

4. Navigation Strategies

In [None]:
%load_ext autoreload
%autoreload 2

### Understanding the data

Load and transform some data to make it easy to use

In [None]:
from src.utils.data_utils import load_graph_data

graph_data = load_graph_data()

loading raw data from tsv files...
formatting articles...
formatting categories...
formatting links...
formatting paths...
formatting distance matrix...
building graph...


In [None]:
from src.utils.general_utils import describe_dict

describe_dict(graph_data)

Keyword                       | Type (shape)          
------------------------------------------------------
shortest-path-distance-matrix   Array (4604, 4604)    
paths_finished                  DataFrame (51318, 8)  
articles                        DataFrame (4604, 1)   
paths_unfinished                DataFrame (24875, 8)  
links                           DataFrame (119882, 2) 
categories                      DataFrame (5204, 2)   
graph                           DiGraph (4604, 119882)


Represent user paths as a graph

In [None]:
from src.data.graph import extract_players_graph

finished_paths_graph = extract_players_graph(graph_data, finished=True)
unfinished_paths_graph = extract_players_graph(graph_data, finished=False)

Note that the following edges are present in 'paths_finished' but not in 'links'
{('Republic_of_Ireland', 'Éire'), ('Bird', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Finland', 'Åland'), ('Claude_Monet', 'Édouard_Manet')}
Note that the following edges are present in 'paths_unfinished' but not in 'links'
{('Bronze_Age', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Microsoft_Windows', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Culture', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Blackbird', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Tony_Blair', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Ireland', 'Éire'), ('James_Bond', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Accountancy', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Fungus', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), ('Miguel_de_Cervantes', 'Wikipedia_Text_of_the_GNU_Free_Documentation_License'), 

## Hub Navigation Analysis

In [None]:
import networkx as nx
import numpy as np


def compute_hubs(graph):
	hubs = nx.hits(graph, normalized=True)[0]

	distribution = np.array([*hubs.values()])
	mean = np.mean(distribution)
	std = np.std(distribution)

	# check positive outliers in the hub values
	significant_hubs = list(filter(lambda t: t[1] - mean > 8 * std, hubs.items()))
	significant_hubs = sorted(significant_hubs, key=lambda t: t[1], reverse=True)

	return significant_hubs

In [None]:
compute_hubs(finished_paths_graph)

[('United_States', 0.032523284079864975),
 ('Europe', 0.019288972404555498),
 ('United_Kingdom', 0.01558426710929422),
 ('England', 0.01451206117797305),
 ('North_America', 0.013763742733676467),
 ('Earth', 0.012658182958466236),
 ('World_War_II', 0.009468624468380468),
 ('English_language', 0.008884412977769705),
 ('Great_Britain', 0.00714491866845842),
 ('France', 0.007120542760335704)]

In [None]:
compute_hubs(unfinished_paths_graph)

[('United_States', 0.022458430058652494),
 ('United_Kingdom', 0.011189493299372714),
 ('England', 0.010328107713851743),
 ('Europe', 0.009365067575018654),
 ('Animal', 0.008076413290830507),
 ('North_America', 0.008043926762033964),
 ('Mammal', 0.007004155538156378),
 ('World_War_II', 0.00667763623055239),
 ('English_language', 0.006385391170843086),
 ('Earth', 0.005854865711796785)]

In [None]:
compute_hubs(graph_data["graph"])

[('Driving_on_the_left_or_right', 0.0022739309867502878)]

## Path Efficiency Analysis


   - Compare actual paths with shortest paths found computationally
   - Develop metrics for path "efficiency" considering both length and completion time
   - Create visualization tools for path comparison and analysis
   - Analyze distribution of successful vs. abandoned paths

## Link Position Impact

## Navigation Strategies