# APAs Network Graph Creation

This code contains all that it needed to recreate the network graph of apas and meps in both 9th and 10th terms. 

# 1. Set-Up

## 1.1 Import Libraries

In [1]:
import pandas as pd
import json
import os
from pyvis.network import Network

## 1.2 Import Data

In [2]:
# Load JSON files for each term
def load_mep_data(json_path):
    """Load MEP data from a JSON file."""
    print(f"Loading MEP data from: {json_path}")
    if not os.path.exists(json_path):
        print(f"File not found: {json_path}")
        exit(1)
    
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            file_content = f.read().strip()  # Strip any extra whitespace
            if not file_content:
                raise ValueError("File is empty")
            
            # Validate JSON by loading
            meps_data = json.loads(file_content)  
            print(f"Loaded {len(meps_data)} MEPs successfully.")
            return meps_data
    
    except json.JSONDecodeError as e:
        print(f"JSON decode error at character {e.pos}: {e}")
    except ValueError as e:
        print(f"ValueError: {e}")
    except Exception as e:
        print(f"Failed to load MEP data: {e}")
    exit(1)

# File paths (replace with your paths)
data_9th_term = load_mep_data('C:/Users/Emilia/Documents/Uni Helsinki/Year Three/AMO Freelance/assistant task/9 term/raw data/national party included/9term_apas_w_nationalParty_noMANUAL.json')
data_10th_term = load_mep_data('C:/Users/Emilia/Documents/Uni Helsinki/Year Three/AMO Freelance/assistant task/10 term/raw data/national party, 10-11-2024/mep_assistants_national_party.json')

Loading MEP data from: C:/Users/Emilia/Documents/Uni Helsinki/Year Three/AMO Freelance/assistant task/9 term/raw data/national party included/9term_apas_w_nationalParty_noMANUAL.json
Loaded 706 MEPs successfully.
Loading MEP data from: C:/Users/Emilia/Documents/Uni Helsinki/Year Three/AMO Freelance/assistant task/10 term/raw data/national party, 10-11-2024/mep_assistants_national_party.json
Loaded 719 MEPs successfully.


## 1.3 Convert to DF & Remove Columns Unesscary for Analysis

In [3]:
# Convert data to DataFrame
df_9th = pd.json_normalize(data_9th_term)
df_9th['term'] = 9  # Add term info
df_10th = pd.json_normalize(data_10th_term)
df_10th['term'] = 10

# Concatenate dataframes
all_meps = pd.concat([df_9th, df_10th], ignore_index=True)
#print(all_meps.columns)

In [4]:
# Merge 'mep_group' and 'group'
all_meps['group'] = all_meps['mep_group'].combine_first(all_meps['group'])

# Merge 'mep_national_party' and 'national_party'
all_meps['national_party'] = all_meps['mep_national_party'].combine_first(all_meps['national_party'])

# Drop the redundant columns after merging
all_meps.drop(columns=['mep_group', 'mep_national_party'], inplace=True)

# Check the updated DataFrame
#print(all_meps.tail())

In [5]:
# keep only columns that we will use in analysis
columns_to_keep = [
    'name',
    'national_party', 
    'group', 
    'country', 
    'term', 
    'assistants.Accredited assistants', 
    'assistants.Accredited assistants (grouping)'
]

# filter the DataFrame
meps_apas = all_meps[columns_to_keep]

# rename the columns by removing the 'assistants.' prefix
meps_apas.columns = [col.replace('assistants.', '') for col in meps_apas.columns]

#print(meps_apas.columns)

# 2. CLEAN AND STANDARDIZE

## 2.1 Add political group abbreviations

In [6]:
# Create a mapping dictionary for party names to abbreviations
group_abbreviations = {
    'Renew Europe Group': 'Renew',
    'European Conservatives and Reformists Group': 'ECR',
    "Group of the European People's Party (Christian Democrats)": 'EPP',
    'Group of the Progressive Alliance of Socialists and Democrats in the European Parliament': 'S&D',
    'Identity and Democracy Group': 'ID',
    'Group of the Greens/European Free Alliance': 'G/EFA',
    'Confederal Group of the European United Left - Nordic Green Left': 'GUE/NGL',
    'Non-attached Members': 'NI',
    'The Left group in the European Parliament - GUE/NGL': 'GUE/NGL',
    'Group of the European United Left - Nordic Green Left': 'GUE/NGL',
    'Patriots for Europe Group': 'Patriots',
    'Europe of Sovereign Nations Group': 'ESN'
}

meps_apas.loc[:, 'group_abbr'] = meps_apas['group'].map(group_abbreviations)
#print(meps_apas['national_party'].unique())
print(meps_apas['group_abbr'].unique())
# #print(meps_apas.columns)

# Iterate through the rows of the DataFrame
for index, row in meps_apas.iterrows():
    if pd.isna(row['group_abbr']):
        print(row['name'])

['Renew' 'ECR' 'EPP' 'S&D' 'ID' 'G/EFA' 'GUE/NGL' 'NI' nan 'ESN'
 'Patriots']
Johan DANIELSSON
Linea SØGAARD-LIDELL
Manuel BOMPARD
Miroslav ČÍŽ
Nicola DANTI
Sira REGO


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meps_apas.loc[:, 'group_abbr'] = meps_apas['group'].map(group_abbreviations)


## 2.2 Flatten & pivot data

This will make sure that the the assistants are row level, instead of the MEPs as the data is currently. 

In [7]:
# Function to extract relevant assistants with type information
def extract_assistants(row):
    assistants = []
    # Define relevant groups and their corresponding types
    assistant_groups = {
        'Accredited assistants': 'accredited',
        'Accredited assistants (grouping)': 'accredited_grouping'
    }
    
    for group, assistant_type in assistant_groups.items():
        # Check if the group exists in the row and is a list
        if group in row and isinstance(row[group], list):
            for name in row[group]:
                assistants.append({
                    'assistant_name': name,
                    'mep_name': row['name'],
                    'group_full': row['group'],
                    'group_abbr': row['group_abbr'],
                    'mep_party': row['national_party'],
                    'country': row['country'],
                    'term': row['term'],
                    'assistant_type': assistant_type  # Add assistant type
                })
    return assistants

# Flatten assistants for easier comparison
assistant_data = pd.DataFrame(
    [item for sublist in meps_apas.apply(extract_assistants, axis=1) for item in sublist]
)
print(assistant_data)

                         assistant_name         mep_name  \
0                 Anna Sophia BENGTSSON  Abir AL-SAHLANI   
1               John August HULTENGAARD  Abir AL-SAHLANI   
2                  Tyra Louise LUNDBERG  Abir AL-SAHLANI   
3     Linn Christina Brunhilde OETTERLI  Abir AL-SAHLANI   
4               Sylwia Joanna BETKOWSKA     Adam JARUBAS   
...                                 ...              ...   
6711          Andréa Laure Marie MOULIN    Željana ZOVKO   
6712                         Polona KEK       Milan ZVER   
6713                     Petra SKRINJAR       Milan ZVER   
6714                        Peter SUHEL       Milan ZVER   
6715                     Dominik STRAKL       Milan ZVER   

                                             group_full group_abbr  \
0                                    Renew Europe Group      Renew   
1                                    Renew Europe Group      Renew   
2                                    Renew Europe Group      Renew   

## 2.3 Remove duplicates 

This section of code will remove the duplicated assistant names (only if they are from the same term). This is done to remove errors from the manual additions of apas. 

In [8]:
# FIRST REMAP THE NAMES
from fuzzywuzzy import fuzz

def find_similar_names(df, same_term=True):
    similar_pairs = []

    # Iterate through each MEP
    for mep in df['mep_name'].unique():
        # Filter for the current MEP
        if same_term:
            mep_data = df[df['mep_name'] == mep]
        else:
            mep_data = df[df['mep_name'] == mep]  # No term filter for this case

        # Check if the DataFrame contains 'term' column
        term = mep_data['term'].iloc[0] if 'term' in mep_data.columns else "Unknown term"

        names = mep_data['assistant_name'].tolist()

        # Check for similarity between each pair of assistant names
        for i in range(len(names)):
            for j in range(i + 1, len(names)):
                # Calculate similarity score
                score = fuzz.ratio(names[i].lower(), names[j].lower())
                
                # Adjusting the threshold between 90 and 99
                if 75 <= score < 100:  # Use the desired range here
                    similar_pairs.append((names[i], names[j], mep, term))

    return similar_pairs

# Find similar names considering only the same term
#similar_names_same_term = find_similar_names(assistant_data, same_term=True)
#print("Similar assistant names for the same MEP in the same term:")
#for name1, name2, mep in similar_names_same_term:
#    print(f"{name1} and {name2} for {mep} are similar.")

# create remapping based on similar names
name_mapping = {
    "Magdalena NOWACKA": "Magdalena HILLS-NOWACKA", 
    "PAULA SENDIN RODRIGUEZ": "Paula SENDÍN RODRIGUEZ",
    "Eleonora Nikolaycheva GUIGOVA": "Eleonora Nikolaycheva GUIGOVA-NOSKER",
    "Eleonora Nikolaycheva GUIGOVA-NOSKER": "Eleonora Nikolaycheva GUIGOVA-NOSKER",
    "Stefanie SIFFT": "Stefanie SIFFT",
    "Stefanie Gabi SIFFT": "Stefanie SIFFT",
    "Anne-Cecile Juliette GAULT": "Anne-Cecile Juliette GAULT",
    "Anne-Cecile Juliette Rachel GAULT": "Anne-Cecile Juliette GAULT",
    "Sophie Anne Geraldine Marie GUIL": "Sophie Anne Geraldine Marie GUIL",
    "Sophie Anne Geraldine Marie Genevieve GUIL": "Sophie Anne Geraldine Marie GUIL",
    "Ana LOPEZ GONZALEZ": "ANA LÓPEZ GONZÁLEZ",
    "ANA LÓPEZ GONZÁLEZ": "ANA LÓPEZ GONZÁLEZ",
    "Claudia MARTINEZ MUNOZ": "Claudia MARTÍNEZ MUÑOZ",
    "CLAUDIA MARTÍNEZ MUÑOZ": "Claudia MARTÍNEZ MUÑOZ",
    "Maria Mercedes GARCIA MUNOZ": "MARIA MERCEDES GARCIA MUÑOZ",
    "MARIA MERCEDES GARCIA MUÑOZ": "MARIA MERCEDES GARCIA MUÑOZ",
    "Gilles Willy B SEGERS": "GILLES WILLY SEGERS",
    "GILLES WILLY SEGERS": "GILLES WILLY SEGERS",
    "Magdalena GONZALEZ GOZALBO": "Maria Magdalena GONZALEZ GOZALBO",
    "Maria Magdalena GONZALEZ GOZALBO": "Maria Magdalena GONZALEZ GOZALBO",
    "Arturo VILLARROYA GONZALEZ": "Arturo VILLARROYA GONZÁLEZ",
    "Fernando Jose NUNEZ ROBRES PATINO": "Fernando Jose NUNEZ-ROBRES PATINO",
    "Fernando Jose NUNEZ-ROBRES PATINO": "Fernando Jose NUNEZ-ROBRES PATINO",
    "Bibiana CARRETO PEREZ BARBADILLO": "BIBIANA CARRETO PÉREZ BARBADILLO",
    "BIBIANA CARRETO PÉREZ BARBADILLO": "BIBIANA CARRETO PÉREZ BARBADILLO",
    "Magdalena GONZALEZ GOZALBO": "Maria Magdalena GONZALEZ GOZALBO",
    "PAULA SENDIN RODRIGUEZ": "Paula SENDÍN RODRIGUEZ",
    "Paula SENDIN RODRIGUEZ": "Paula SENDÍN RODRIGUEZ",
    "MARIA MERCEDES GARCIA MUNOZ": "MARIA MERCEDES GARCIA MUÑOZ", 
    "ARTURO VILLARROYA GONZALEZ": "Arturo VILLARROYA GONZÁLEZ",
    "ARTURO VILLARROYA GONZALEZ":"Arturo VILLARROYA GONZÁLEZ", 
    "Marlene Elisabeth SEELMAIER": "Marlene Elisabeth STROEHM",          # NAMED CHANGED, CURRENT REMAPPING FOUND IN 10th term 
    "Olivia Jeanna Beatrice ANDERSSON": "Olivia Jeanna Beatrice STÅNGE", # NAMED CHANGED, CURRENT REMAPPING FOUND IN 10th term
    "Sirle ROSENFELDT": "Sirle ROSENFELDT - KOOP",                       # NAMED CHANGED, CURRENT REMAPPING FOUND IN 10th term
    "Saara ANTTINEN": "Saara ANTTINEN-KHUMALO",                          # NAMED CHANGED, CURRENT REMAPPING FOUND IN 10th term
    "Nikolina BRKOVIC": "Nikolina BRKOVIC STANCHEVA", 
    "Josep/Pepe MERCADAL BAQUERO": "Pepe MERCADAL BAQUERO",               # CHANGED HIS NAME TO PEPE
    "Josep MERCADAL BAQUERO": "Pepe MERCADAL BAQUERO" 
}

# remap 
assistant_data['assistant_name'] = assistant_data['assistant_name'].replace(name_mapping)

# check that name remapping worked
# Find similar names considering only the same term
similar_names_same_term = find_similar_names(assistant_data, same_term=True)
print("Similar assistant names for the same MEP in the same term:")
for name1, name2, mep, term in similar_names_same_term:
    print(f"{name1} and {name2} for {mep} in term {term} are similar.")

Similar assistant names for the same MEP in the same term:
Silvia RIGHI and SILVIA DRAGONI for Elisabetta GUALMINI in term 9 are similar.
Silvia RIGHI and SILVIA DRAGONI for Elisabetta GUALMINI in term 9 are similar.
Petra PAULIK and Petra PASZTOR-PAULIK for Enikő GYŐRI in term 9 are similar.
Petra PAULIK and Petra PASZTOR-PAULIK for György HÖLVÉNYI in term 9 are similar.
CARLOS CAMPILLO PEREZ and IGNACIO CAMPILLO PEREZ for Hermann TERTSCH in term 9 are similar.
Dirk FRIEDRICH and Pierre FRIEDRICH for Jörg MEUTHEN in term 9 are similar.
Petra PAULIK and Petra PASZTOR-PAULIK for Kinga GÁL in term 9 are similar.
Jakub PUSZKARSKI and Jakub KARWANSKI for Magdalena ADAMOWICZ in term 9 are similar.
Michal MOJTO and Michaela MOJTOVÁ for Monika BEŇOVÁ in term 9 are similar.
Michal MOJTO and Michaela MOJTOVÁ for Monika BEŇOVÁ in term 9 are similar.
Petra PAULIK and Petra PASZTOR-PAULIK for Tamás DEUTSCH in term 9 are similar.
CARLOS CAMPILLO PEREZ and IGNACIO CAMPILLO PEREZ for Mireia BORRÁS PA

In [9]:
# THEN REMOVE ANY EXACT MATCHES 
# Find exact matches within each MEP and term
def find_exact_matches(df):
    exact_matches = {}

    # Group by 'mep_name' and 'term' and find duplicates in 'assistant_name'
    for (mep, term), group in df.groupby(['mep_name', 'term']):
        # Normalize names by stripping whitespace and converting to lowercase
        normalized_names = group['assistant_name'].str.strip().str.lower()
        
        # Count occurrences of each normalized name
        duplicates = normalized_names.value_counts()
        
        # Only keep names that occur more than once
        duplicate_names = duplicates[duplicates > 1].index.tolist()
        
        if duplicate_names:
            exact_matches[(mep, term)] = duplicate_names

    return exact_matches

# Get exact matches
exact_matches = find_exact_matches(assistant_data)

# Print out exact matches
print("Exact matches in MEP categories (for the same term):")
for (mep, term), names in exact_matches.items():
    print(f"\nMEP: {mep} | Term: {term}")
    for name in names:
        print(f" - {name}")

# remove duplicates 
unique_apas = assistant_data.drop_duplicates(subset=['mep_name', 'term', 'assistant_name'])
#print(unique_meps_apas)

Exact matches in MEP categories (for the same term):


## 2.3 Clean country column

This section of the code replaces instances where MEP's countries have been scraped incorrectly. These errors were found manually. 

In [10]:
# Replace values in the 'country' column
unique_apas['country'].replace({'PSC-PSOE': 'Spain', 'PNCR': 'Romania'}, inplace=True)

# Check the updated values in the 'country' column
print(unique_apas['country'].nunique())

28


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  unique_apas['country'].replace({'PSC-PSOE': 'Spain', 'PNCR': 'Romania'}, inplace=True)


# 3. Network Graph

In [12]:
print(unique_apas)

                         assistant_name         mep_name  \
0                 Anna Sophia BENGTSSON  Abir AL-SAHLANI   
1               John August HULTENGAARD  Abir AL-SAHLANI   
2                  Tyra Louise LUNDBERG  Abir AL-SAHLANI   
3     Linn Christina Brunhilde OETTERLI  Abir AL-SAHLANI   
4               Sylwia Joanna BETKOWSKA     Adam JARUBAS   
...                                 ...              ...   
6711          Andréa Laure Marie MOULIN    Željana ZOVKO   
6712                         Polona KEK       Milan ZVER   
6713                     Petra SKRINJAR       Milan ZVER   
6714                        Peter SUHEL       Milan ZVER   
6715                     Dominik STRAKL       Milan ZVER   

                                             group_full group_abbr  \
0                                    Renew Europe Group      Renew   
1                                    Renew Europe Group      Renew   
2                                    Renew Europe Group      Renew   

In [17]:
import pandas as pd
from pyvis.network import Network

# Define constants
color_map = {
    'Renew': '#FFD700', 'EPP': '#3399FF', 'S&D': '#FF0000',
    'G/EFA': '#009900', 'ID': '#2B3856', 'ECR': '#0054A5',
    'GUE/NGL': '#990000', 'NI': '#999999', 'PfE': '#301C5C',
    'ESN': '#6495ED'
}

dual_term_assistants = set()
dual_term_meps = set()

# Preprocess data
def preprocess_data(data):
    """Preprocess data and identify dual-term entities."""
    # Map political groups to abbreviations
    group_abbreviations = {
        'Renew Europe Group': 'Renew',
        'European Conservatives and Reformists Group': 'ECR',
        "Group of the European People's Party (Christian Democrats)": 'EPP',
        'Group of the Progressive Alliance of Socialists and Democrats in the European Parliament': 'S&D',
        'Identity and Democracy Group': 'ID',
        'Group of the Greens/European Free Alliance': 'G/EFA',
        'Confederal Group of the European United Left - Nordic Green Left': 'GUE/NGL',
        'Non-attached Members': 'NI',
        'The Left group in the European Parliament - GUE/NGL': 'GUE/NGL',
        'Group of the European United Left - Nordic Green Left': 'GUE/NGL',
        'Patriots for Europe Group': 'PfE',
        'Europe of Sovereign Nations Group': 'ESN'
    }

    data['group_full'] = data['group_full'].map(group_abbreviations).fillna(data['group_full'])

    # Filter data for terms 9 and 10
    dual_term_data = data[data['term'].isin([9, 10])]

    # Compute dual terms for assistants
    dual_term_assistants.update(
        dual_term_data.groupby('assistant_name')
        .filter(lambda x: len(x['term'].unique()) == 2)  # Check for both terms 9 and 10
        .assistant_name.unique()  # Extract assistant names
    )

    # Compute dual terms for MEPs
    dual_term_meps.update(
        dual_term_data.groupby('mep_name')
        .filter(lambda x: len(x['term'].unique()) == 2)  # Check for both terms 9 and 10
        .mep_name.unique()  # Extract MEP names
    )

    return data

accredited_assistants = preprocess_data(unique_apas)
print(f"Dual-term assistants: {dual_term_assistants}")
print(f"Dual-term MEPs: {dual_term_meps}")

# Create the Pyvis network
net = Network(height='900px', width='100%', notebook=False, directed=False)

# Precompute node titles and shapes
node_titles = {}
node_shapes = {}

def compute_hover_titles(data):
    """Compute hover titles and node shapes."""
    for _, row in data.iterrows():
        mep_name = row['mep_name']
        assistant_name = row['assistant_name']
        term = row['term']
        group_full = row['group_full']

        # MEP hover title
        if mep_name not in node_titles:
            node_titles[mep_name] = (
                f"{mep_name}\nPolitical Group: {group_full}\nNational Party: {row['mep_party']}"
            )
            node_shapes[mep_name] = {
                'shape': 'star' if mep_name in dual_term_meps else ('square' if term == 10 else 'dot'),
                'size': 40 if mep_name in dual_term_meps else 30,
                'color': color_map.get(group_full, '#000000')
            }

        # Assistant hover title
        if assistant_name not in node_titles:
            assistants_term_9 = data[(data['assistant_name'] == assistant_name) & (data['term'] == 9)]['mep_name'].tolist()
            assistants_term_10 = data[(data['assistant_name'] == assistant_name) & (data['term'] == 10)]['mep_name'].tolist()

            node_titles[assistant_name] = (
                f"{assistant_name}\nTerm 9: {'\n '.join(assistants_term_9)}\n"
                f"\nTerm 10: {'\n '.join(assistants_term_10)}"
            )
            node_shapes[assistant_name] = {
                'shape': 'star' if assistant_name in dual_term_assistants else ('square' if assistants_term_10 else 'dot'),
                'size': 20,
                'color': 'lightgray'
            }

compute_hover_titles(accredited_assistants)

# Add nodes
for node_name, props in node_shapes.items():
    net.add_node(node_name, title=node_titles[node_name], **props)

# Add edges
edges = [(row['mep_name'], row['assistant_name']) for _, row in accredited_assistants.iterrows()]
net.add_edges(edges)

# Apply force atlas physics for initial layout
net.force_atlas_2based(gravity=-50, spring_length=50, spring_strength=0.1, damping=0.9)

# Save and toggle physics off after rendering
net.save_graph('final_mep_network.html')
net.toggle_physics(False)

# Customize HTML
with open("final_mep_network.html", "r+") as file:
    content = file.read()

    # Title and Legend
    title_html = "<h2 style='text-align: center;'>Accredited Assistants Network (Terms 9 & 10)</h2>"
    legend_html = """
    <div style="position: absolute; top: 10px; right: 10px; background-color: white; padding: 10px; border: 1px solid black; z-index: 999;">
    <h4 style="margin: 0;">Legend</h4>
    <ul style="list-style-type: none; padding-left: 0; margin: 5px 0;">
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwebstockreview.net%2Fimages%2Fcircle-vector-png-1.png&f=1&nofb=1&ipt=a697617b87e7bd1b3dc81c4617f8e4b398e729a6ced767aa5d5a0b3a1eadce5e&ipo=images" alt="Circle" width="20" height="20" style="margin-right: 10px;">
            Term 9 only
        </li>
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Ftoppng.com%2Fuploads%2Fpreview%2Ffile-svg-transparent-star-icon-11563055585lnti7ukeko.png&f=1&nofb=1&ipt=cc5290ab7e9f37adc70ef426e23641c12f904adda403946c8d9afcbc5b412460&ipo=images" alt="Star" width="20" height="20" style="margin-right: 10px;">
            Both Terms
        </li>
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwebstockreview.net%2Fimages%2Fsquare-clipart-border-1.jpg&f=1&nofb=1&ipt=18537daaf7169b4e6797329da0b3a4633874b99e5b05e1983821ae263c9542c6&ipo=images" alt="Square" width="20" height="20" style="margin-right: 10px;">
            Term 10 only
        </li>
    </ul>
    <h4 style="margin: 0;">Political Groups</h4>
    <ul style="list-style-type: none; padding-left: 0; margin: 5px 0;">
        <li style="margin: 5px 0;"><span style="color: #FFD700; font-size: 1.5em;">&#9679;</span> Renew</li>
        <li style="margin: 5px 0;"><span style="color: #3399FF; font-size: 1.5em;">&#9679;</span> EPP</li>
        <li style="margin: 5px 0;"><span style="color: #FF0000; font-size: 1.5em;">&#9679;</span> S&D</li>
        <li style="margin: 5px 0;"><span style="color: #009900; font-size: 1.5em;">&#9679;</span> G/EFA</li>
        <li style="margin: 5px 0;"><span style="color: #2B3856; font-size: 1.5em;">&#9679;</span> ID</li>
        <li style="margin: 5px 0;"><span style="color: #0054A5; font-size: 1.5em;">&#9679;</span> ECR</li>
        <li style="margin: 5px 0;"><span style="color: #990000; font-size: 1.5em;">&#9679;</span> GUE/NGL</li>
        <li style="margin: 5px 0;"><span style="color: #999999; font-size: 1.5em;">&#9679;</span> NA</li>
        <li style="margin: 5px 0;"><span style="color: #301C5C; font-size: 1.5em;">&#9679;</span> PfE</li>
        <li style="margin: 5px 0;"><span style="color: #6495ED; font-size: 1.5em;">&#9679;</span> ESN</li>
    </ul>
    </div>
    """

    # Filter
    filter_html = """
    <div style='position: absolute; bottom: 10px; left: 10px; background-color: white; padding: 10px; border: 1px solid #ccc; z-index: 1000;'>
        <label for='shapeFilter'>Filter by Term:</label>
        <select id='shapeFilter'>
            <option value='all'>All Terms</option>
            <option value='star'>Both Terms</option>
            <option value='square'>10th Term</option>
            <option value='dot'>9th Term</option>
        </select>
    </div>
    <script>
        const shapeFilter = document.getElementById('shapeFilter');
        shapeFilter.addEventListener('change', function() {
            const selectedShape = shapeFilter.value;
            network.body.data.nodes.update(network.body.data.nodes.get().map(node => {
                node.hidden = selectedShape !== 'all' && node.shape !== selectedShape;
                return node;
            }));
        });
    </script>
    """

    # Inject into HTML
    content = content.replace("</body>", title_html + legend_html + filter_html + "</body>")
    file.seek(0)
    file.write(content)
    file.truncate()

print("Graph saved as 'final_mep_network.html'.")

Dual-term assistants: {'Sirle ROSENFELDT - KOOP', 'Christian BECK', 'Maciej Pawel JELENSKI', 'Mai JOGIMAA', 'Mara LESINA', 'Louk Louis Catharina FAESEN', 'Almudena MOLINA DELGADO', 'Lidiya Rumenova SIMOVA', 'Piotr SZYMANOWICZ', 'Minna Anette MANNINEN', 'Francesco CARBONI', 'Borys Maria BRZEZINSKI', 'Misachi Josef OGAWA', 'Xavier Victor E EVERAERT', 'Ota JAKSCH', 'Orsolya ZARA', 'Arrigo VERTUA', 'MARIA INÊS MARQUES DA COSTA PIRRÉ', 'Charlotte Julie Isabelle IZARD', 'Silvia RIGHI', 'Athanasios ARGYROPOULOS', 'Carmen DINU', 'Nora KARTELOVA', 'Giancarlo ALTIERI', 'Jose Eduardo BAPTISTA VIEIRA', 'Raffaele PADOVANO', 'Christian Ernesto DE MATTIA', 'Miklos-Csaba GYOERGYJAKAB', 'Christoph ABT', 'Anne PLOEGER', 'Ioanna STEFATOU', 'Rok LESAR', 'Claude-Edouard CROCHET', 'Antonio MATTIELLO', 'Eimear Rose DEERY', 'Ignacio Santiago DEL OLMO TEJERINA', 'Paul Jonas ESCHENBURG', 'Simone SIMARI', 'Alexandra MEHDI', 'Ann Braarup CUYKENS', 'Stefanie SIFFT', 'Michaela RIAPOSOVA', 'Roxane Julie Sarah CHAPLA

In [26]:
import pandas as pd
from pyvis.network import Network

# Define constants
color_map = {
    'Renew': '#FFD700', 'EPP': '#3399FF', 'S&D': '#FF0000',
    'G/EFA': '#009900', 'ID': '#2B3856', 'ECR': '#0054A5',
    'GUE/NGL': '#990000', 'NI': '#999999', 'PfE': '#301C5C',
    'ESN': '#6495ED'
}

dual_term_assistants = set()
dual_term_meps = set()

# Preprocess data
def preprocess_data(data):
    """Preprocess data and identify dual-term entities."""
    # Map political groups to abbreviations
    group_abbreviations = {
        'Renew Europe Group': 'Renew',
        'European Conservatives and Reformists Group': 'ECR',
        "Group of the European People's Party (Christian Democrats)": 'EPP',
        'Group of the Progressive Alliance of Socialists and Democrats in the European Parliament': 'S&D',
        'Identity and Democracy Group': 'ID',
        'Group of the Greens/European Free Alliance': 'G/EFA',
        'Confederal Group of the European United Left - Nordic Green Left': 'GUE/NGL',
        'Non-attached Members': 'NI',
        'The Left group in the European Parliament - GUE/NGL': 'GUE/NGL',
        'Group of the European United Left - Nordic Green Left': 'GUE/NGL',
        'Patriots for Europe Group': 'Patriots',
        'Europe of Sovereign Nations Group': 'ESN'
    }

    data['group_full'] = data['group_full'].map(group_abbreviations).fillna(data['group_full'])

    # Filter data for terms 9 and 10
    dual_term_data = data[data['term'].isin([9, 10])]

    # Compute dual terms for assistants
    dual_term_assistants.update(
        dual_term_data.groupby('assistant_name')
        .filter(lambda x: len(x['term'].unique()) == 2)  # Check for both terms 9 and 10
        .assistant_name.unique()  # Extract assistant names
    )

    # Compute dual terms for MEPs
    dual_term_meps.update(
        dual_term_data.groupby('mep_name')
        .filter(lambda x: len(x['term'].unique()) == 2)  # Check for both terms 9 and 10
        .mep_name.unique()  # Extract MEP names
    )

    return data

accredited_assistants = preprocess_data(unique_apas)
print(f"Dual-term assistants: {dual_term_assistants}")
print(f"Dual-term MEPs: {dual_term_meps}")

# Create the Pyvis network
net = Network(height='900px', width='100%', notebook=False, directed=False)

# Precompute node titles and shapes
node_titles = {}
node_shapes = {}

def compute_hover_titles(data):
    """Compute hover titles and node shapes."""
    for _, row in data.iterrows():
        mep_name = row['mep_name']
        assistant_name = row['assistant_name']
        term = row['term']
        group_full = row['group_full']

        # MEP hover title
        if mep_name not in node_titles:
            node_titles[mep_name] = (
                f"{mep_name}\nPolitical Group: {group_full}\nNational Party: {row['mep_party']}"
            )
            node_shapes[mep_name] = {
                'shape': 'star' if mep_name in dual_term_meps else ('square' if term == 10 else 'dot'),
                'size': 40 if mep_name in dual_term_meps else 30,
                'color': color_map.get(group_full, '#000000')
            }

        # Assistant hover title
        if assistant_name not in node_titles:
            assistants_term_9 = data[(data['assistant_name'] == assistant_name) & (data['term'] == 9)]['mep_name'].tolist()
            assistants_term_10 = data[(data['assistant_name'] == assistant_name) & (data['term'] == 10)]['mep_name'].tolist()

            node_titles[assistant_name] = (
                f"{assistant_name}\n"
                f"Term 9: {'\n '.join(assistants_term_9)}\n"
                f"\nTerm 10: {'\n '.join(assistants_term_10)}"
            )
            node_shapes[assistant_name] = {
                'shape': 'star' if assistant_name in dual_term_assistants else ('square' if assistants_term_10 else 'dot'),
                'size': 20,
                'color': 'lightgray'
            }

compute_hover_titles(accredited_assistants)

# Add nodes
for node_name, props in node_shapes.items():
    net.add_node(node_name, title=node_titles[node_name], **props)

# Add edges
edges = [(row['mep_name'], row['assistant_name']) for _, row in accredited_assistants.iterrows()]
net.add_edges(edges)

# Apply force atlas physics for initial layout
net.force_atlas_2based(gravity=-50, spring_length=50, spring_strength=0.1, damping=0.9)

# Save and toggle physics off after rendering
net.save_graph('final_mep_network.html')
net.toggle_physics(False)

# Customize HTML
with open("final_mep_network.html", "r+") as file:
    content = file.read()

    # Title and Legend
    title_html = "<h2 style='text-align: center;'>Accredited Assistants Network (Terms 9 & 10)</h2>"
    legend_html = """
    <div style="position: absolute; top: 10px; right: 10px; background-color: white; padding: 10px; border: 1px solid black; z-index: 999;">
    <h4 style="margin: 0;">Legend</h4>
    <ul style="list-style-type: none; padding-left: 0; margin: 5px 0;">
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwebstockreview.net%2Fimages%2Fcircle-vector-png-1.png&f=1&nofb=1&ipt=a697617b87e7bd1b3dc81c4617f8e4b398e729a6ced767aa5d5a0b3a1eadce5e&ipo=images" alt="Circle" width="20" height="20" style="margin-right: 10px;">
            Term 9 only
        </li>
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Ftoppng.com%2Fuploads%2Fpreview%2Ffile-svg-transparent-star-icon-11563055585lnti7ukeko.png&f=1&nofb=1&ipt=cc5290ab7e9f37adc70ef426e23641c12f904adda403946c8d9afcbc5b412460&ipo=images" alt="Star" width="20" height="20" style="margin-right: 10px;">
            Both Terms
        </li>
        <li style="display: flex; align-items: center; margin: 5px 0;">
            <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwebstockreview.net%2Fimages%2Fsquare-clipart-border-1.jpg&f=1&nofb=1&ipt=18537daaf7169b4e6797329da0b3a4633874b99e5b05e1983821ae263c9542c6&ipo=images" alt="Square" width="20" height="20" style="margin-right: 10px;">
            Term 10 only
        </li>
    </ul>
    <h4 style="margin: 0;">Political Groups</h4>
    <ul style="list-style-type: none; padding-left: 0; margin: 5px 0;">
        <li style="margin: 5px 0;"><span style="color: #FFD700; font-size: 1.5em;">&#9679;</span> Renew</li>
        <li style="margin: 5px 0;"><span style="color: #3399FF; font-size: 1.5em;">&#9679;</span> EPP</li>
        <li style="margin: 5px 0;"><span style="color: #FF0000; font-size: 1.5em;">&#9679;</span> S&D</li>
        <li style="margin: 5px 0;"><span style="color: #009900; font-size: 1.5em;">&#9679;</span> G/EFA</li>
        <li style="margin: 5px 0;"><span style="color: #2B3856; font-size: 1.5em;">&#9679;</span> ID</li>
        <li style="margin: 5px 0;"><span style="color: #0054A5; font-size: 1.5em;">&#9679;</span> ECR</li>
        <li style="margin: 5px 0;"><span style="color: #990000; font-size: 1.5em;">&#9679;</span> GUE/NGL</li>
        <li style="margin: 5px 0;"><span style="color: #999999; font-size: 1.5em;">&#9679;</span> NA</li>
        <li style="margin: 5px 0;"><span style="color: #301C5C; font-size: 1.5em;">&#9679;</span> PfE</li>
        <li style="margin: 5px 0;"><span style="color: #6495ED; font-size: 1.5em;">&#9679;</span> ESN</li>
    </ul>
    </div>
    """

    # Search functionality
    search_html = """
    <div style='position: absolute; top: 10px; left: 10px; background-color: white; padding: 10px; border: 1px solid #ccc; z-index: 1000;'>
        <label for='searchNode'>Search Node:</label>
        <input type='text' id='searchNode' placeholder='Enter MEP/Assistant Name' />
        <button id='searchBtn'>Search</button>
    </div>
    <script>
        const searchNodeInput = document.getElementById('searchNode');
        const searchBtn = document.getElementById('searchBtn');

        searchBtn.addEventListener('click', function() {
            const name = searchNodeInput.value.trim().toLowerCase();
            if (name) {
                // Hide all nodes except the one that matches
                network.body.data.nodes.update(network.body.data.nodes.get().map(node => {
                    const isMatch = node.id.toLowerCase().includes(name);
                    if (isMatch) {
                        node.size = 400;  // Temporarily enlarge the node
                    } else {
                        node.size = node.originalSize || 30;  // Reset to default size
                    }
                    return node;
                }))
            }
        });

        searchNodeInput.addEventListener('input', function() {
            if (!searchNodeInput.value) {
                // Reset nodes and edges when search bar is cleared
                network.body.data.nodes.update(network.body.data.nodes.get().map(node => {
                    node.size = node.originalSize || 30;
                    return node;
                }))
            }
        });
    </script>
    """
     # Filter
    filter_html = """
    <div style='position: absolute; bottom: 10px; left: 10px; background-color: white; padding: 10px; border: 1px solid #ccc; z-index: 1000;'>
        <label for='shapeFilter'>Filter:</label>
        <select id='shapeFilter'>
            <option value='all'>All</option>
            <option value='star'>Both Terms</option>
            <option value='square'>10th Term Only</option>
            <option value='dot'>9th Term Only</option>
        </select>
    </div>
    <script>
        const shapeFilter = document.getElementById('shapeFilter');
        shapeFilter.addEventListener('change', function() {
            const selectedShape = shapeFilter.value;
            network.body.data.nodes.update(network.body.data.nodes.get().map(node => {
                node.hidden = selectedShape !== 'all' && node.shape !== selectedShape;
                return node;
            }));
        });
    </script>
    """
    # Inject into HTML content
    content = content.replace("</body>", title_html + legend_html + filter_html + search_html + "</body>")
    file.seek(0)
    file.write(content)
    file.truncate()

print("Graph saved as 'final_mep_network.html' with enhanced search functionality.")

Dual-term assistants: {'Sirle ROSENFELDT - KOOP', 'Christian BECK', 'Maciej Pawel JELENSKI', 'Mai JOGIMAA', 'Mara LESINA', 'Louk Louis Catharina FAESEN', 'Almudena MOLINA DELGADO', 'Lidiya Rumenova SIMOVA', 'Piotr SZYMANOWICZ', 'Minna Anette MANNINEN', 'Francesco CARBONI', 'Borys Maria BRZEZINSKI', 'Misachi Josef OGAWA', 'Xavier Victor E EVERAERT', 'Ota JAKSCH', 'Orsolya ZARA', 'Arrigo VERTUA', 'MARIA INÊS MARQUES DA COSTA PIRRÉ', 'Charlotte Julie Isabelle IZARD', 'Silvia RIGHI', 'Athanasios ARGYROPOULOS', 'Carmen DINU', 'Nora KARTELOVA', 'Giancarlo ALTIERI', 'Jose Eduardo BAPTISTA VIEIRA', 'Raffaele PADOVANO', 'Christian Ernesto DE MATTIA', 'Miklos-Csaba GYOERGYJAKAB', 'Christoph ABT', 'Anne PLOEGER', 'Ioanna STEFATOU', 'Rok LESAR', 'Claude-Edouard CROCHET', 'Antonio MATTIELLO', 'Eimear Rose DEERY', 'Ignacio Santiago DEL OLMO TEJERINA', 'Paul Jonas ESCHENBURG', 'Simone SIMARI', 'Alexandra MEHDI', 'Ann Braarup CUYKENS', 'Stefanie SIFFT', 'Michaela RIAPOSOVA', 'Roxane Julie Sarah CHAPLA