# Communities

In [1]:
import networkx as nx
import numpy as np
G = nx.read_graphml("world_graph_attributes.graphml")


First, we will inspect the network

In [2]:
for node, data in G.nodes(data=True):
    print(node, data)

Afghanistan {'sentiment_score': 3.0, 'gdp': 19101353833}
China {'sentiment_score': 4.4, 'gdp': 19910000000000}
Egypt {'sentiment_score': 4.1, 'gdp': 303175127598}
France {'sentiment_score': 5.7, 'gdp': 2715518274227}
India {'sentiment_score': 6.0, 'gdp': 2611000000000}
Iran {'sentiment_score': 4.8, 'gdp': 445345282123}
Italy {'sentiment_score': 5.7, 'gdp': 2001244392042}
Pakistan {'sentiment_score': 4.7, 'gdp': 304400000000}
Qatar {'sentiment_score': 6.3, 'gdp': 183466208791}
Russia {'sentiment_score': 4.1, 'gdp': 1699876578871}
United_Kingdom {'sentiment_score': 6.1, 'gdp': 2827113184696}
United_States {'sentiment_score': 5.8, 'gdp': 21427700000000}
Uzbekistan {'sentiment_score': 4.1, 'gdp': 57921286440}
Albania {'sentiment_score': 5.9, 'gdp': 15278077447}
Bosnia_and_Herzegovina {'sentiment_score': 4.1, 'gdp': 20047848435}
Bulgaria {'sentiment_score': 5.2, 'gdp': 86000000000}
Germany {'sentiment_score': 5.5, 'gdp': 3845630030824}
Greece {'sentiment_score': 6.2, 'gdp': 209852761469}
Ir

In [3]:
print(len(G.nodes()))

192


In [4]:
G = G.to_undirected()

In [5]:
# Building the Louvain partition using NetworkX's built-in function (define a seed for reproducibility)
louvain_communities = nx.community.louvain_communities(G, seed = 10)
print(f"Total communities formed using Louvain method: {len(louvain_communities)}\n")


for i, community in enumerate(louvain_communities):
    print(f"Louvain Community {i+1} (n = {len(community)}): {list(community)[:5]}")
print("---")

Total communities formed using Louvain method: 6

Louvain Community 1 (n = 31): ['Bahrain', 'Syria', 'United_Arab_Emirates', 'Oman', 'Algeria']
Louvain Community 2 (n = 42): ['Sweden', 'Denmark', 'North_Macedonia', 'Slovakia', 'Azerbaijan']
Louvain Community 3 (n = 51): ['Kenya', 'Mozambique', 'Somalia', 'Burkina_Faso', 'Zimbabwe']
Louvain Community 4 (n = 32): ['Argentina', 'Guyana', 'Ecuador', 'Dominica', 'Mexico']
Louvain Community 5 (n = 35): ['Micronesia', 'Solomon_Islands', 'Philippines', 'Timor-Leste', 'Thailand']
Louvain Community 6 (n = 1): ['Saint_Vincent_and_the_Grenadines']
---


Now we will try to form communities based on language. Does not work now with the new network (does not have language). 

In [6]:
# Build communities by language
language_communities = {}

for node, data in G.nodes(data=True):
    language = data.get("official_language") 

    # skip nodes with missing values
    if language is None:
        print("Node with no language")
        print(node)
        continue

    language_communities[language] = node

Node with no language
Afghanistan
Node with no language
China
Node with no language
Egypt
Node with no language
France
Node with no language
India
Node with no language
Iran
Node with no language
Italy
Node with no language
Pakistan
Node with no language
Qatar
Node with no language
Russia
Node with no language
United_Kingdom
Node with no language
United_States
Node with no language
Uzbekistan
Node with no language
Albania
Node with no language
Bosnia_and_Herzegovina
Node with no language
Bulgaria
Node with no language
Germany
Node with no language
Greece
Node with no language
Ireland
Node with no language
Montenegro
Node with no language
Serbia
Node with no language
Spain
Node with no language
Algeria
Node with no language
Iceland
Node with no language
Libya
Node with no language
Mali
Node with no language
Malta
Node with no language
Mauritania
Node with no language
Morocco
Node with no language
Netherlands
Node with no language
Portugal
Node with no language
Tunisia
Node with no langu

In [None]:
for lang, members in language_communities.items():
    print(f"Language: {lang} ({len(members)} nodes)")
    print(members[:10], "...\n")

Language: Nepali (5 nodes)
Nepal ...

Language: Standard Chinese (5 nodes)
China ...

Language: Hindi (5 nodes)
India ...

Language: Lao (4 nodes)
Laos ...

Language: Burmese (7 nodes)
Myanmar ...

Language: Turkish (6 nodes)
Turkey ...

Language: Armenian (7 nodes)
Armenia ...

Language: Georgian (7 nodes)
Georgia ...

Language: German (13 nodes)
Liechtenst ...

Language: Greek (6 nodes)
Cyprus ...

Language: Persian (10 nodes)
Tajikistan ...

Language: nan (5 nodes)
Japan ...

Language: Arabic (10 nodes)
Mauritania ...

Language: Spanish (18 nodes)
Dominican_ ...

Language: French (7 nodes)
Vanuatu ...

Language: Italian (10 nodes)
San_Marino ...

Language: nan (9 nodes)
Australia ...

Language: Pashto (11 nodes)
Afghanista ...

Language: English (21 nodes)
Saint_Kitt ...

Language: Malay (6 nodes)
Brunei ...

Language: nan (13 nodes)
United_Sta ...

Language: Vietnamese (7 nodes)
Vietnam ...

Language: Portuguese (13 nodes)
Guinea-Bis ...

Language: Indonesian (9 nodes)
Indonesia ..

I will now create the divisions of GDP and the sentiment score. We will consider 3 levels: 1, 2 and 3. A function will be developed that does that to use for the various attributes more easily. 

In [15]:
def classify_nodes_by_attribute(G, attribute, k=3):
    """
    Returns a dictionary mapping class_label -> list_of_nodes.
    The attribute has a higher value in the the higher levels.
    """

    # Collect values only from nodes that have the attribute (can be erased once the network is perfect)
    #values = []
    #nodes_with_attr = []
#
    #for n, data in G.nodes(data=True):
    #    if attribute not in data:
    #        print(f"Node '{n}' is missing attribute '{attribute}' — skipping.")
    #        continue
#
    #    # Extract attribute values
    #    values.append(data[attribute])
    #    nodes_with_attr.append(n)

    # Store the values
    #vals = np.array(values)
    vals = np.array([G.nodes[n][attribute] for n in G.nodes])

    # Compute partition boundaries
    bins = np.linspace(vals.min(), vals.max(), k + 1)

    # Create labels: Level_1, Level_2, ..., Level_k
    labels = [f"Level_{i+1}" for i in range(k)]

    # Prepare output dictionary
    classes = {label: [] for label in labels}

    # Assign nodes to classes
    for node, data in G.nodes(data=True):
        value = data.get(attribute)

        # Skip if missing
        if value is None:
            print(f"Node {node} has no attribute '{attribute}'")
            continue

        # Determine the partition index
        idx = np.digitize(value, bins) - 1 
        # clamp to valid range 
        idx = min(max(idx, 0), k-1)        

        classes[labels[idx]].append(node)

    return classes


In [11]:
sentiment_partitions = classify_nodes_by_attribute(G, "sentiment_score", k=3)

In [12]:
sentiment_partitions["Level_1"]

['Afghanistan',
 'Libya',
 'Mali',
 'Democratic_Republic_of_the_Congo',
 'Niger',
 'Zimbabwe',
 'Guinea',
 'Lebanon',
 'Belarus',
 'Mozambique',
 'Burkina_Faso',
 'Burundi',
 'Chad',
 'Central_African_Republic',
 'Nicaragua',
 'Haiti',
 'South_Sudan',
 'Yemen',
 'Equatorial_Guinea',
 'Liberia',
 'Guinea-Bissau',
 'Turkmenistan',
 'Tajikistan',
 'Myanmar',
 'Suriname']

In [13]:
print(len(sentiment_partitions["Level_2"]))

116


In [14]:
print(sentiment_partitions["Level_3"])

['France', 'India', 'Italy', 'Qatar', 'United_Kingdom', 'United_States', 'Albania', 'Greece', 'Spain', 'Iceland', 'Malta', 'Netherlands', 'Portugal', 'Andorra', 'Australia', 'Belgium', 'Canada', 'Chile', 'Denmark', 'Japan', 'New_Zealand', 'Switzerland', 'Singapore', 'Austria', 'Liechtenstein', 'Luxembourg', 'Slovenia', 'Norway', 'Belize', 'Bhutan', 'Sweden', 'Botswana', 'Brunei', 'Senegal', 'Tanzania', 'Ecuador', 'Costa_Rica', 'Czech_Republic', 'Finland', 'Tonga', 'Jordan', 'Nauru', 'Tuvalu', 'United_Arab_Emirates', 'Maldives', 'Palau', 'Seychelles', 'Monaco', 'South_Korea', 'San_Marino', 'The_Bahamas']


In [16]:
gdp_partitions = classify_nodes_by_attribute(G,'gdp', k=3)

In [17]:
print(gdp_partitions["Level_3"])

['China', 'United_States']
