In [22]:
import pandas as pd
import networkx as nx
from pyvis.network import Network

# Read the CSV with fallback encoding
df = pd.read_csv("ISCO-08 EN.csv", encoding='latin1')  # or 'utf-8', 'cp1252' if needed
df


Unnamed: 0,ISCO_version,major,major_label,sub_major,sub_major_label,minor,minor_label,unit,description
0,ISCO-08,1,Managers,11.0,"Chief Executives, Senior Officials and Legisla...",111,Legislators and Senior Officials,1111,Legislators
1,ISCO-08,1,Managers,11.0,"Chief Executives, Senior Officials and Legisla...",111,Legislators and Senior Officials,1112,Senior Government Officials
2,ISCO-08,1,Managers,11.0,"Chief Executives, Senior Officials and Legisla...",111,Legislators and Senior Officials,1113,Traditional Chiefs and Heads of Villages
3,ISCO-08,1,Managers,11.0,"Chief Executives, Senior Officials and Legisla...",111,Legislators and Senior Officials,1114,Senior Officials of Special-interest Organizat...
4,ISCO-08,1,Managers,11.0,"Chief Executives, Senior Officials and Legisla...",112,Managing Directors and Chief Executives,1120,Managing Directors and Chief Executives
...,...,...,...,...,...,...,...,...,...
1308,ISCO-58,9,"Service, Sport and Recreation Workers",,,99,"Service, Sport and Recreation Workers Not Else...",999,"Service, Sport and Recreation Workers Not Else..."
1309,ISCO-58,X,Workers Not Classifiable by Occupation,,,X1,New Workers Seeking Employment,X19,New Workers Seeking Employment
1310,ISCO-58,X,Workers Not Classifiable by Occupation,,,X2,Workers Reporting Occupations Unidentifiable o...,X29,Workers Reporting Occupations Unidentifiable o...
1311,ISCO-58,X,Workers Not Classifiable by Occupation,,,X3,Workers Not Reporting Any Occupation,X39,Workers Not Reporting Any Occupation


In [23]:
import pandas as pd
import networkx as nx
from pyvis.network import Network

# Initialize directed graph
G = nx.DiGraph()
G.add_node("All Jobs")

# Keep track of all nodes that have been added (to check for missing incoming edges)
all_nodes = set()

# Loop through and add edges (ensure all labels are strings and not NaN)
for _, row in df.iterrows():
    major_label = str(row["major_label"]) if pd.notna(row["major_label"]) else None
    sub_major_label = (
        str(row["sub_major_label"]) if pd.notna(row["sub_major_label"]) else None
    )
    minor_label = str(row["minor_label"]) if pd.notna(row["minor_label"]) else None
    unit_label = str(row["description"]) if pd.notna(row["description"]) else None

    # Add edges with handling for missing hierarchical layers
    if major_label:
        G.add_edge("All Jobs", major_label)
        all_nodes.add(major_label)

    if major_label and sub_major_label:
        G.add_edge(major_label, sub_major_label)
        all_nodes.add(sub_major_label)

    if sub_major_label and minor_label:
        G.add_edge(sub_major_label, minor_label)
        all_nodes.add(minor_label)
    if minor_label and unit_label:
        G.add_edge(minor_label, unit_label)
        all_nodes.add(unit_label)

# Ensure all nodes have an incoming edge, otherwise add one from "All Jobs"
for node in all_nodes:
    if G.in_degree(node) == 0:  # If no incoming edge
        G.add_edge("All Jobs", node)  # Add edge from "All Jobs"

# Print node and edge counts
print("Nodes in graph:", len(G.nodes))
print("Edges in graph:", len(G.edges))

# Use pyvis to visualize the graph
net = Network(notebook=True)
net.from_nx(G)

# Set options for visualization (optional customization)
net.force_atlas_2based()  # For layout (force-directed)
net.show("go_graph.html")

Nodes in graph: 1683
Edges in graph: 1666
go_graph.html


In [24]:
def print_tree_breadth_first_with_repeated_edges(graph, node):
    """ Print the tree structure in breadth-first order and detect repeated nodes and edges """
    current_level = [node]  # Start with the root node
    next_level = []  # List to hold the nodes at the next level
    visited = set()  # Set to keep track of visited nodes
    repeated_edges = set()  # Set to keep track of repeated edges
    
    # Add the root node to the visited set
    visited.add(node)
    
    # Traverse through each level until no more nodes are found
    while current_level:
        # Pop nodes from the current level and print the count and the nodes
        print(f"Count: {len(current_level)}: ", end="")
        
        for _ in range(len(current_level)):
            current_node = current_level.pop(0)  # Pop the first node
            print(current_node, end=", " if _ < len(current_level) - 1 else "")
            
            # Add children to the next level if not already visited
            for neighbor in graph.neighbors(current_node):
                edge = (current_node, neighbor)
                
                # Check if the edge has been traversed already (repeated edge)
                if edge in repeated_edges:
                    print(f"\nRepeated edge: {edge}")
                else:
                    repeated_edges.add(edge)
                
                # If the neighbor has not been visited, add it to the next level
                if neighbor not in visited:
                    next_level.append(neighbor)
                    visited.add(neighbor)
        
        print()  # New line after printing the current level's nodes
        
        # Move to the next level
        current_level = next_level
        next_level = []  # Reset next_level for the next iteration

# Print the tree with repeated edge detection starting from "All Jobs"
print("Tree Structure (Breadth First) with Repeated Nodes and Edges:")
print_tree_breadth_first_with_repeated_edges(G, "All Jobs")


Tree Structure (Breadth First) with Repeated Nodes and Edges:
Count: 1: All Jobs
Count: 40: Managers, Professionals, Technicians and Associate Professionals, Clerical Support Workers, Services And Sales Workers, Skilled Agricultural, Forestry and Fishery Workers, Craft and Related Trades Workers, Plant and Machine Operators and Assemblers, Elementary Occupations, Armed Forces Occupations, Legislators, Senior Officials and Managers , Professionals , Technicians and Associate Professionals , Clerks , Service Workers and Shop and Market Sales Workers , Skilled Agricultural and Fishery Workers , Craft and Related Trades Workers , Plant and Machine Operators and Assemblers , Elementary Occupations , Professional, Technical and Related Workers Professional, Technical and Related WorkersAdministrative and Managerial Workers Clerical and Related Workers Sales Workers Service Workers Agricultural, Animal Husbandry and Forestry Workers, Fishermen and Hunters Production and Related Workers, Trans

In [25]:

# Create Pyvis network
nt = Network(height="800px", width="100%", directed=True)
nt.from_nx(G)
nt.force_atlas_2based()
nt.show_buttons(filter_=['physics'])
nt.write_html("isco_pyvis_tree.html")

print("✅ Done! Open 'isco_pyvis_tree.html' to view the interactive job tree.")

✅ Done! Open 'isco_pyvis_tree.html' to view the interactive job tree.
