In [10]:
import pandas as pd
import os
from typing import List, Set, Optional
import json
from typing import Union

def read_nodes_file(nodes_path: str) -> pd.DataFrame:
    """
    Read nodes data from either CSV or JSON file.
    
    Args:
        nodes_path: Path to the nodes file (either .csv or .json)
        
    Returns:
        pandas DataFrame containing the nodes data
    """
    file_extension = nodes_path.split('.')[-1].lower()
    
    if file_extension == 'csv':
        nodes_df = pd.read_csv(nodes_path)
    elif file_extension == 'json':
        nodes_df = pd.read_json(nodes_path)
    else:
        raise ValueError(f"Unsupported file format: {file_extension}. Please use CSV or JSON.")
        
    return nodes_df

def process_network(
    nodes: Union[str, pd.DataFrame],
    edges: Union[str, pd.DataFrame],
    output_path: Optional[str] = None,
    min_cases: int = 50,
    specific_articles: Optional[List[str]] = None,
    split_by_article: bool = True,
    save_files: bool = False,
    merge_subarticles: bool = False
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Process and optionally split the citation network based on ECHR articles.
    
    Args:
        nodes: Path to nodes CSV file or nodes DataFrame
        edges: Path to edges CSV file or edges DataFrame
        output_path: Directory to save the network files (required if save_files=True)
        min_cases: Minimum number of cases required for an article network
        specific_articles: Optional list of specific articles to split by
        split_by_article: Whether to split the network by article
        save_files: Whether to save the processed network(s) to files
    
    Returns:
        tuple[pd.DataFrame, pd.DataFrame]: The processed (nodes_df, edges_df)
    """
    # Validate arguments
    if save_files and not output_path:
        raise ValueError("output_path is required when save_files=True")

    # Handle input data
    if isinstance(nodes, str):
        print(f"Reading nodes from {nodes}...")
        nodes_df = read_nodes_file(nodes)
    else:
        nodes_df = nodes.copy()
        
    if isinstance(edges, str):
        print(f"Reading edges from {edges}...")
        edges_df = read_nodes_file(edges)
    else:
        edges_df = edges.copy()
    
    # Convert string representation of list to actual list in edges_df
    edges_df['references'] = edges_df['references'].apply(
        lambda x: eval(x) if isinstance(x, str) else x
    )
    
    if not split_by_article:
        if save_files:
            os.makedirs(output_path, exist_ok=True)
            full_dir = os.path.join(output_path, 'full')
            os.makedirs(full_dir, exist_ok=True)
            save_as_json(nodes_df, os.path.join(full_dir, 'nodes.json'))
            save_as_json(edges_df, os.path.join(full_dir, 'edges.json'))
            print(f"Saved processed network to {output_path}")
        return nodes_df, edges_df
    
    # Get unique articles
    if merge_subarticles:
        unique_articles = get_unique_articles_updated(nodes_df)
    else:
        unique_articles = get_unique_articles(nodes_df)

    if specific_articles:
        unique_articles = {art for art in unique_articles if art in specific_articles}
    print(f"Found {len(unique_articles)} unique articles")
    
    if save_files:
        # Create output directory
        os.makedirs(output_path, exist_ok=True)
        
        # Process each article
        for article in unique_articles:
            # Filter data for this article
            article_nodes, article_edges = filter_by_article(nodes_df, edges_df, article)
            
            # Check if network meets minimum size requirement
            if len(article_nodes) >= min_cases:
                # Create article-specific directory
                article_dir = os.path.join(output_path, f"article_{article.replace('/', '_')}")
                os.makedirs(article_dir, exist_ok=True)
                
                # Save filtered data as JSON
                save_as_json(article_nodes, os.path.join(article_dir, 'nodes.json'))
                save_as_json(article_edges, os.path.join(article_dir, 'edges.json'))
                
                print(f"Created network for Article {article} with {len(article_nodes)} nodes")
            else:
                print(f"Skipping Article {article} - only {len(article_nodes)} cases (minimum: {min_cases})")
    
    return nodes_df, edges_df

def get_unique_articles(df: pd.DataFrame) -> Set[str]:
    """
    Extract unique articles from the article column, splitting combined articles.
    
    Args:
        df: DataFrame containing an 'article' column
    Returns:
        Set of unique article numbers
    """
    all_articles = set()
    
    # Filter out rows where article is NaN
    df = df[df['article'].notna()]
    
    # Split articles and add to set
    for articles_str in df['article']:
        # Split on semicolon if multiple articles
        articles = articles_str.split(';')
        # Filter out empty strings and strip whitespace
        articles = {art.strip() for art in articles if art.strip()}
        all_articles.update(articles)
    
    return all_articles

def get_unique_articles_updated(df: pd.DataFrame) -> Set[str]:
    """
    Extract unique articles from the article column, treating sub-articles as their main article.
    Also handles '+' separated articles.
    
    Args:
        df: DataFrame containing an 'article' column
    Returns:
        Set of unique article numbers
    """
    all_articles = set()
    
    # Filter out rows where article is NaN
    df = df[df['article'].notna()]
    
    # Split articles and add to set
    for articles_str in df['article']:
        # Split on semicolon if multiple articles
        articles = articles_str.split(';')
        # Split on plus if articles are combined
        articles = [art.split('+') for art in articles]
        # Flatten the list
        articles = [item.strip() for sublist in articles for item in sublist]
        # Filter out empty strings and strip whitespace
        articles = {art.strip() for art in articles if art.strip()}
        # Extract main article numbers (before any dash)
        main_articles = {art.split('-')[0] for art in articles}
        all_articles.update(main_articles)
    
    return all_articles

def filter_by_article(nodes_df: pd.DataFrame, edges_df: pd.DataFrame, article) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Filter nodes and edges for a specific article.
    Handles cases where articles are combined with '+'.
    
    Args:
        nodes_df: DataFrame containing node information
        edges_df: DataFrame containing edge information
        article: Article number to filter by
    Returns:
        Tuple of filtered (nodes_df, edges_df)
    """
    # Create pattern to match article as main or sub-article, including when combined with '+'
    pattern = f"(^|;|\+){article}(-|$|;|\+)"
    
    # Reference the filter_article function from load.py
    filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
    
    # Get list of valid ECLIs for this article
    valid_eclis = set(filtered_nodes['ecli'].values)
    
    # Filter edges to only include connections between nodes in this article
    filtered_edges = edges_df[edges_df['ecli'].isin(valid_eclis)].copy()
    
    # Filter references to only include valid ECLIs
    filtered_edges['references'] = filtered_edges['references'].apply(
        lambda refs: [ref for ref in refs if ref in valid_eclis]
    )
    
    return filtered_nodes, filtered_edges

def save_as_json(df: pd.DataFrame, filepath: str):
    """
    Save DataFrame as JSON in a format compatible with rankings.ipynb.
    
    Args:
        df: DataFrame to save
        filepath: Path where to save the JSON file
    """
    # Convert DataFrame to list of records
    records = df.to_dict(orient='records')
    
    # Handle special case for references column in edges
    if 'references' in df.columns:
        for record in records:
            # Ensure references is a list
            if isinstance(record['references'], str):
                record['references'] = eval(record['references'])
    
    # Save with proper formatting
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    
    with open(filepath, 'w') as f:
        json.dump(records, f, indent=2)


  pattern = f"(^|;|\+){article}(-|$|;|\+)"
  pattern = f"(^|;|\+){article}(-|$|;|\+)"


# Parse and load unbalanced network

# Balance network

In [9]:
def balance_network_by_parameter(
    nodes_df: pd.DataFrame, 
    edges_df: pd.DataFrame, 
    balance_parameter: str
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Balance network by reducing all classes of a parameter to match the size of the smallest class.
    
    Args:
        nodes_df: DataFrame containing node information
        edges_df: DataFrame containing edge information
        balance_parameter: Column name in nodes_df to balance by
        
    Returns:
        tuple[pd.DataFrame, pd.DataFrame]: Balanced (nodes_df, edges_df)
    """
    # Get class sizes
    class_sizes = nodes_df[balance_parameter].value_counts()
    min_size = class_sizes.min()
    
    print(f"Balancing network by {balance_parameter}")
    print(f"Original class distribution: {class_sizes.to_dict()}")
    print(f"Target size per class: {min_size}")
    
    # Create balanced nodes dataframe
    balanced_nodes = pd.DataFrame()
    for class_value in class_sizes.index:
        # Get all nodes of this class
        class_nodes = nodes_df[nodes_df[balance_parameter] == class_value]
        
        # Randomly sample to match minimum size
        sampled_nodes = class_nodes.sample(n=min_size, random_state=42)
        balanced_nodes = pd.concat([balanced_nodes, sampled_nodes])
    
    # Get valid ECLIs after balancing
    valid_eclis = set(balanced_nodes['ecli'].values)
    
    # Filter edges to only include connections between remaining nodes
    balanced_edges = edges_df[edges_df['ecli'].isin(valid_eclis)].copy()
    
    # Filter references to only include valid ECLIs
    balanced_edges['references'] = balanced_edges['references'].apply(
        lambda refs: [ref for ref in refs if ref in valid_eclis]
    )
    
    print(f"Final network size: {len(balanced_nodes)} nodes and {len(balanced_edges)} edges")
    
    return balanced_nodes, balanced_edges

In [11]:
def count_total_edges(edges_df: pd.DataFrame) -> int:
    """
    Count total number of edges in the network, considering multiple targets per entry.
    
    Args:
        edges_df: DataFrame containing edges with 'target' column that may contain multiple targets
        
    Returns:
        int: Total number of edges in the network
    """
    total_edges = 0
    
    # Iterate through each row
    for _, row in edges_df.iterrows():
        # Handle references column
        if isinstance(row['references'], str):
            # Clean the string and convert to list
            refs_str = row['references'].strip('[]').replace("'", "").replace('"', "")
            # Split by comma and clean each ECLI
            references = [ref.strip() for ref in refs_str.split(',') if ref.strip()]
            # Count valid ECLI references
            edge_count = len([ref for ref in references if ref.startswith('ECLI:')])
            total_edges += edge_count
        elif isinstance(row['references'], list):
            # If references is already a list
            edge_count = len([ref for ref in row['references'] if isinstance(ref, str) and ref.startswith('ECLI:')])
            total_edges += edge_count
            
    return total_edges

In [12]:
# Parse full network
nodes_path = '../data/METADATA/nodes.csv'
edges_path = '../data/METADATA/edges.csv'
output_root = '../networks/merged-article-edges'


all_nodes_df, all_edges_df = process_network(
    nodes=nodes_path,
    edges=edges_path,
    min_cases=50,
    split_by_article=False,
    save_files=False,
)

# Merge importance levels of 1 and 2 together (assign value 1), transpose 3 to 2, and 4 to 3
all_nodes_df['importance'] = all_nodes_df['importance'].replace({1: 1, 2: 1, 3: 2, 4: 3})

print(f"Number of nodes: {len(all_nodes_df)}")
print(f"Number of edges: {count_total_edges(all_edges_df)}")

# Balance network by importance
balanced_nodes_importance, balanced_edges_importance = balance_network_by_parameter(
    nodes_df=all_nodes_df,
    edges_df=all_edges_df,
    balance_parameter='importance'
)

# Balance network by doctypebranch
balanced_nodes_doctypebranch, balanced_edges_doctypebranch = balance_network_by_parameter(
    nodes_df=all_nodes_df,
    edges_df=all_edges_df,
    balance_parameter='doctypebranch'
)

# Save balanced networks (full)
save_as_json(all_nodes_df, os.path.join(output_root, 'full-unbalanced/nodes.json'))
save_as_json(all_edges_df, os.path.join(output_root, 'full-unbalanced/edges.json'))

save_as_json(balanced_nodes_importance, os.path.join(output_root, 'full-balanced-importance/nodes.json'))
save_as_json(balanced_edges_importance, os.path.join(output_root, 'full-balanced-importance/edges.json'))

save_as_json(balanced_nodes_doctypebranch, os.path.join(output_root, 'full-balanced-doctypebranch/nodes.json'))
save_as_json(balanced_edges_doctypebranch, os.path.join(output_root, 'full-balanced-doctypebranch/edges.json'))

# Split the balanced networks
_, _ = process_network(
    nodes=balanced_nodes_importance,
    edges=balanced_edges_importance,
    min_cases=50,
    split_by_article=True,
    save_files=True,
    merge_subarticles=True,
    output_path=os.path.join(output_root, 'split-balanced-importance')
)

_, _ = process_network(
    nodes=balanced_nodes_doctypebranch,
    edges=balanced_edges_doctypebranch,
    min_cases=50,
    split_by_article=True,
    save_files=True,
    merge_subarticles=True,
    output_path=os.path.join(output_root, 'split-balanced-doctypebranch')
)

# Split the unbalanced networks
_, _ = process_network(
    nodes=all_nodes_df,
    edges=all_edges_df,
    min_cases=50,
    split_by_article=True,
    merge_subarticles=True,
    save_files=True,
    output_path=os.path.join(output_root, 'split-unbalanced')
)

Reading nodes from ../data/METADATA/nodes.csv...
Reading edges from ../data/METADATA/edges.csv...
Number of nodes: 27801
Number of edges: 232002
Balancing network by importance
Original class distribution: {3.0: 19895, 2.0: 5756, 1.0: 2150}
Target size per class: 2150
Final network size: 6450 nodes and 6450 edges
Balancing network by doctypebranch
Original class distribution: {'CHAMBER': 20717, 'COMMITTEE': 6570, 'GRANDCHAMBER': 514}
Target size per class: 514
Final network size: 1542 nodes and 1542 edges
Found 48 unique articles
Skipping Article 33 - only 8 cases (minimum: 50)
Created network for Article 39 with 106 nodes
Skipping Article 27 - only 3 cases (minimum: 50)
Skipping Article 4 - only 34 cases (minimum: 50)
Skipping Article 32 - only 6 cases (minimum: 50)
Created network for Article P7 with 85 nodes
Skipping Article 30 - only 11 cases (minimum: 50)
Created network for Article P1 with 1021 nodes
Skipping Article 43 - only 11 cases (minimum: 50)
Skipping Article P13 - only 1 

  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 41 with 3595 nodes
Created network for Article 37 with 229 nodes
Created network for Article 13 with 948 nodes
Created network for Article 1 with 66 nodes
Created network for Article 2 with 427 nodes
Created network for Article 9 with 125 nodes
Created network for Article 38 with 114 nodes
Created network for Article 11 with 213 nodes
Skipping Article 44 - only 1 cases (minimum: 50)
Skipping Article 26 - only 14 cases (minimum: 50)
Skipping Article 56 - only 8 cases (minimum: 50)
Skipping Article P6 - only 2 cases (minimum: 50)
Skipping Article 17 - only 32 cases (minimum: 50)
Skipping Article 19 - only 10 cases (minimum: 50)
Skipping Article 57 - only 19 cases (minimum: 50)
Skipping Article 25 - only 18 cases (minimum: 50)
Created network for Article 35 with 1585 nodes
Skipping Article 36 - only 39 cases (minimum: 50)
Skipping Article P12 - only 10 cases (minimum: 50)


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 5 with 981 nodes
Created network for Article 29 with 557 nodes
Created network for Article P4 with 80 nodes
Skipping Article 52 - only 2 cases (minimum: 50)
Created network for Article 18 with 62 nodes
Created network for Article 6 with 2987 nodes
Created network for Article 46 with 204 nodes
Created network for Article 14 with 528 nodes
Skipping Article 28 - only 2 cases (minimum: 50)
Skipping Article 53 - only 4 cases (minimum: 50)
Created network for Article 34 with 531 nodes
Skipping Article 12 - only 26 cases (minimum: 50)
Skipping Article 16 - only 2 cases (minimum: 50)
Created network for Article 7 with 92 nodes
Skipping Article 58 - only 14 cases (minimum: 50)
Skipping Article 15 - only 29 cases (minimum: 50)


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 3 with 1082 nodes
Found 44 unique articles
Skipping Article 33 - only 5 cases (minimum: 50)
Skipping Article 39 - only 25 cases (minimum: 50)
Skipping Article 4 - only 5 cases (minimum: 50)
Skipping Article 30 - only 11 cases (minimum: 50)
Skipping Article P7 - only 31 cases (minimum: 50)
Skipping Article 32 - only 1 cases (minimum: 50)
Created network for Article P1 with 286 nodes
Skipping Article 43 - only 11 cases (minimum: 50)
Created network for Article 10 with 142 nodes
Created network for Article 8 with 230 nodes
Created network for Article 41 with 544 nodes
Created network for Article 13 with 229 nodes
Created network for Article 37 with 78 nodes
Skipping Article 1 - only 31 cases (minimum: 50)
Created network for Article 2 with 87 nodes
Skipping Article 9 - only 33 cases (minimum: 50)
Skipping Article 38 - only 19 cases (minimum: 50)
Created network for Article 11 with 66 nodes
Skipping Article 26 - only 2 cases (minimum: 50)
Skipping Article 56 - o

  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, 

Created network for Article 39 with 852 nodes
Skipping Article 27 - only 4 cases (minimum: 50)
Created network for Article 4 with 56 nodes
Skipping Article 32 - only 7 cases (minimum: 50)
Created network for Article P7 with 380 nodes
Skipping Article 30 - only 12 cases (minimum: 50)
Created network for Article P1 with 4746 nodes
Skipping Article 43 - only 12 cases (minimum: 50)
Skipping Article P13 - only 1 cases (minimum: 50)


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 10 with 1691 nodes
Created network for Article 8 with 3046 nodes


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 41 with 10100 nodes
Created network for Article 13 with 3990 nodes
Created network for Article 1 with 102 nodes
Created network for Article 37 with 1254 nodes
Created network for Article 2 with 1388 nodes


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 9 with 243 nodes
Created network for Article 38 with 594 nodes
Created network for Article 11 with 854 nodes
Skipping Article 44 - only 1 cases (minimum: 50)
Skipping Article 26 - only 25 cases (minimum: 50)
Skipping Article 56 - only 8 cases (minimum: 50)
Skipping Article P6 - only 10 cases (minimum: 50)
Created network for Article 17 with 67 nodes
Skipping Article 19 - only 13 cases (minimum: 50)
Skipping Article 57 - only 30 cases (minimum: 50)
Skipping Article 25 - only 21 cases (minimum: 50)


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 35 with 3632 nodes
Skipping Article 36 - only 47 cases (minimum: 50)
Skipping Article P12 - only 20 cases (minimum: 50)
Created network for Article 5 with 4302 nodes
Created network for Article P4 with 199 nodes


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 29 with 2521 nodes
Skipping Article 52 - only 2 cases (minimum: 50)
Created network for Article 18 with 118 nodes


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Created network for Article 6 with 14811 nodes
Created network for Article 46 with 368 nodes
Created network for Article 14 with 1030 nodes
Skipping Article 28 - only 3 cases (minimum: 50)
Skipping Article 53 - only 6 cases (minimum: 50)
Created network for Article 34 with 1181 nodes
Skipping Article 12 - only 35 cases (minimum: 50)
Skipping Article 16 - only 2 cases (minimum: 50)
Created network for Article 7 with 170 nodes


  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]
  filtered_nodes = nodes_df[nodes_df['article'].str.contains(pattern, na=False, regex=True)]


Skipping Article 58 - only 32 cases (minimum: 50)
Skipping Article 15 - only 45 cases (minimum: 50)
Created network for Article 3 with 4606 nodes
