In [None]:
import os
if not os.environ.get("PREAMBLE_RUN", False): 
    %run -i "../preamble.py" 2

In [None]:
import pandas as pd
import requests
import numpy as np

from src.constants import PATH_INTERMED_CHES_51_MIG_W_META, PATH_INTERMED_CHES_53_ENRICHED_MEMBER, PATH_INTERMED_CHES_54_MIG_W_CHES_META
from src.normalize_strings import normalize_list, normalize_str

# set display options for the notebook
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

Alright, now that we have our enriched migration dataset with EP's person IDs, the membership data with a mapping between EP's party IDs and CHES' party IDs. We can finally map the individual speeches (i.e. speakers at time of speech delivery) to the speakers national party affiliations and more importantly to the corresponding CHES party (if existing).

In [None]:
# first load the data on enriched migration and national memberships
original_migration_enriched_df = pd.read_parquet(PATH_INTERMED_CHES_51_MIG_W_META, engine='fastparquet')
display(original_migration_enriched_df.head())

Unnamed: 0.1,Unnamed: 0,speaker,text,date,agenda,speechnumber,procedure_ID,partyfacts_ID,period,chair,MEP,commission,written,multispeaker,link,translatedText,translationSource,year,block,party,migration_prob,ep_identifier,ep_label,ep_citizenship,ep_placeOfBirth,ep_bday,ep_deathDate,ep_gender,ep_hasMembership
0,492,Karen Melchior,"Mr President, thank you very much to the Commi...",2024-04-24,22. Advance passenger information: enhancing a...,8,bill_26075_ID bill_26076_ID bill_26075_ID bi...,6401.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, thank you very much to the Commi...",original_gm,2024,liberal,ELDR/ALDE/Renew,0.393282,197567,Karen MELCHIOR,DNK,Gentofte,1980-10-15,,FEMALE,"[{'id': 'membership/197567-f-161757', 'type': ..."
1,844,Tineke Strik,"Mr President, Commissioner. People who flee wa...",2024-04-23,17. EU-Egypt strategic and comprehensive partn...,9,,6403.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, Commissioner. People who flee wa...",original_gm,2024,green,Greens/EFA,0.423501,197772,Tineke STRIK,NLD,Appeltern,1961-09-28,,FEMALE,"[{'id': 'membership/197772-f-161044', 'type': ..."
2,1050,Anabela Rodrigues,"Senhora Presidente, em 2024, o tráfico de sere...",2024-04-22,20. Amending Directive 2011/36/EU on preventin...,10,bill_241_ID bill_241_ID,6402.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Madam President, in 2024, human trafficking co...",machine_gm,2024,left,NGL/The Left,0.375925,254718,Anabela RODRIGUES,PRT,Lisboa,1976-10-18,,FEMALE,"[{'id': 'membership/254718-f-169354', 'type': ..."
3,1054,Beata Kempa,Pani Przewodnicząca! Pani Komisarz! Potrzebuje...,2024-04-22,20. Amending Directive 2011/36/EU on preventin...,14,bill_241_ID bill_241_ID,6400.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Madam Commissioner! We need very specific, ver...",machine_gm,2024,(extreme)_right,ECR,0.256493,197519,Beata KEMPA,POL,Syców,1966-02-11,,FEMALE,"[{'id': 'membership/197519-f-164917', 'type': ..."
4,1056,Dorien Rookmaker,"Voorzitter, dank u wel, meneer Engerer, want u...",2024-04-22,20. Amending Directive 2011/36/EU on preventin...,16,bill_241_ID bill_241_ID,6400.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Madam President, thank you, Mr Engerer, becaus...",machine_gm,2024,(extreme)_right,ECR,0.475315,204733,Dorien ROOKMAKER,NLD,Zijpe,1964-07-30,,FEMALE,"[{'id': 'membership/204733-f-161317', 'type': ..."


In [None]:
original_nat_memberships_df = pd.read_parquet(PATH_INTERMED_CHES_53_ENRICHED_MEMBER, engine='fastparquet')
display(original_nat_memberships_df.head())

Unnamed: 0,person_id_memb,person_label_memb,hasMembership_memb,party_id_memb,member_startDate_memb,member_endDate_memb,member_role_memb,party_class_memb,country_code_memb,party_temporal_memb,party_label_memb,party_all_labels_memb,party_all_names_memb,party_id_ches,country_code_ches,party_abbrev_ches,party_name_ches,party_name_en_ches
0,103488,Sergio GUTIÉRREZ PRIETO,"[{'id': 'membership/103488-f-148559', 'type': ...",4025,2014-07-01,2019-05-20,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20140701-20190701', 'type'...",PSOE,[PSOE],[Partido Socialista Obrero Español],501,ESP,[PSOE],[Partido Socialista Obrero Español],[Spanish Socialist Workers’ Party]
1,103488,Sergio GUTIÉRREZ PRIETO,"[{'id': 'membership/103488-f-148559', 'type': ...",2714,2010-07-19,2014-06-30,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20090714-20140630', 'type'...",PSOE,[PSOE],[Partido Socialista Obrero Español],501,ESP,[PSOE],[Partido Socialista Obrero Español],[Spanish Socialist Workers’ Party]
2,103845,Virginie ROZIÈRE,"[{'id': 'membership/103845-f-150436', 'type': ...",4277,2014-07-01,2018-02-07,MEMBER,NATIONAL_POLITICAL_GROUP,FRA,"{'id': 'time-period/20140701-20190701', 'type'...",PRG,[PRG],[Parti radical de gauche],603,FRA,[PRG],[Parti Radical de Gauche],[Left Radical Party]
3,103845,Virginie ROZIÈRE,"[{'id': 'membership/103845-f-150436', 'type': ...",5040,2018-02-08,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,FRA,"{'id': 'time-period/20180208-20190701', 'type'...",Les radicaux de Gauche,[Les radicaux de Gauche],[Les radicaux de Gauche],603,FRA,[PRG],[Parti Radical de Gauche],[Left Radical Party]
4,111014,Josef WEIDENHOLZER,"[{'id': 'membership/111014-f-135825', 'type': ...",2659,2011-12-01,2014-06-30,MEMBER,NATIONAL_POLITICAL_GROUP,AUT,"{'id': 'time-period/20090714-20140630', 'type'...",SPÖ,[SPÖ],[Sozialdemokratische Partei Österreichs],1301,AUT,"[SPO, SPÖ]",[Sozialdemokratische Partei Österreichs],[Social Democratic Party of Austria]


Apply some sanity checks beforehand, namely are there any overlaps in memberships for MEP's which would result in ambiguous mapping to CHES parties (what we do not want)?

In [None]:
# Work on copies
left_migration = original_migration_enriched_df.copy(deep=True)
right_membership = original_nat_memberships_df.copy(deep=True)
display(left_migration.head(3))
display(right_membership.head(3))

Unnamed: 0.1,Unnamed: 0,speaker,text,date,agenda,speechnumber,procedure_ID,partyfacts_ID,period,chair,MEP,commission,written,multispeaker,link,translatedText,translationSource,year,block,party,migration_prob,ep_identifier,ep_label,ep_citizenship,ep_placeOfBirth,ep_bday,ep_deathDate,ep_gender,ep_hasMembership
0,492,Karen Melchior,"Mr President, thank you very much to the Commi...",2024-04-24,22. Advance passenger information: enhancing a...,8,bill_26075_ID bill_26076_ID bill_26075_ID bi...,6401.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, thank you very much to the Commi...",original_gm,2024,liberal,ELDR/ALDE/Renew,0.393282,197567,Karen MELCHIOR,DNK,Gentofte,1980-10-15,,FEMALE,"[{'id': 'membership/197567-f-161757', 'type': ..."
1,844,Tineke Strik,"Mr President, Commissioner. People who flee wa...",2024-04-23,17. EU-Egypt strategic and comprehensive partn...,9,,6403.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, Commissioner. People who flee wa...",original_gm,2024,green,Greens/EFA,0.423501,197772,Tineke STRIK,NLD,Appeltern,1961-09-28,,FEMALE,"[{'id': 'membership/197772-f-161044', 'type': ..."
2,1050,Anabela Rodrigues,"Senhora Presidente, em 2024, o tráfico de sere...",2024-04-22,20. Amending Directive 2011/36/EU on preventin...,10,bill_241_ID bill_241_ID,6402.0,9,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Madam President, in 2024, human trafficking co...",machine_gm,2024,left,NGL/The Left,0.375925,254718,Anabela RODRIGUES,PRT,Lisboa,1976-10-18,,FEMALE,"[{'id': 'membership/254718-f-169354', 'type': ..."


Unnamed: 0,person_id_memb,person_label_memb,hasMembership_memb,party_id_memb,member_startDate_memb,member_endDate_memb,member_role_memb,party_class_memb,country_code_memb,party_temporal_memb,party_label_memb,party_all_labels_memb,party_all_names_memb,party_id_ches,country_code_ches,party_abbrev_ches,party_name_ches,party_name_en_ches
0,103488,Sergio GUTIÉRREZ PRIETO,"[{'id': 'membership/103488-f-148559', 'type': ...",4025,2014-07-01,2019-05-20,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20140701-20190701', 'type'...",PSOE,[PSOE],[Partido Socialista Obrero Español],501,ESP,[PSOE],[Partido Socialista Obrero Español],[Spanish Socialist Workers’ Party]
1,103488,Sergio GUTIÉRREZ PRIETO,"[{'id': 'membership/103488-f-148559', 'type': ...",2714,2010-07-19,2014-06-30,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20090714-20140630', 'type'...",PSOE,[PSOE],[Partido Socialista Obrero Español],501,ESP,[PSOE],[Partido Socialista Obrero Español],[Spanish Socialist Workers’ Party]
2,103845,Virginie ROZIÈRE,"[{'id': 'membership/103845-f-150436', 'type': ...",4277,2014-07-01,2018-02-07,MEMBER,NATIONAL_POLITICAL_GROUP,FRA,"{'id': 'time-period/20140701-20190701', 'type'...",PRG,[PRG],[Parti radical de gauche],603,FRA,[PRG],[Parti Radical de Gauche],[Left Radical Party]


In [None]:
# look for overlaps in membership time periods (initial sanity check)

# Work on copies
left_migration = original_migration_enriched_df.copy(deep=True)
right_membership = original_nat_memberships_df.copy(deep=True)

# first check start <= end
relation_check = right_membership.dropna(subset=['member_endDate_memb'])
assert (relation_check["member_startDate_memb"] <= relation_check["member_endDate_memb"]).all()

# sort, keep track of previous end and check for overlaps
overlaps = (
    right_membership.sort_values(["person_id_memb", "member_startDate_memb"])
         .assign(prev_end=lambda d: d.groupby("person_id_memb")["member_endDate_memb"].shift())
         .query("prev_end >= member_startDate_memb")
)

if not overlaps.empty:
    raise ValueError(
        f"Overlapping intervals in 'memberships' detected:\n{overlaps}"
    )

In [None]:
# Work on copies
left_migration = original_migration_enriched_df.copy(deep=True)
right_membership = original_nat_memberships_df.copy(deep=True)

# Parse dates (YYYY-MM-DD)
left_migration["date"] = pd.to_datetime(left_migration["date"], format="%Y-%m-%d", errors="raise")
right_membership["member_startDate_memb"] = pd.to_datetime(right_membership["member_startDate_memb"], format="%Y-%m-%d", errors="raise")
right_membership["member_endDate_memb"] = pd.to_datetime(right_membership["member_endDate_memb"],   format="%Y-%m-%d", errors="raise")

# IMPORTANT for merge_asof: on-keys must be globally sorted ascending
left_migration  = left_migration.sort_values(["date", "ep_identifier"])
right_membership = right_membership.sort_values(["member_startDate_memb", "person_id_memb"])

# perform the merge
merged = pd.merge_asof(
    left_migration,
    right_membership,
    left_on="date",
    right_on="member_startDate_memb",
    left_by="ep_identifier",
    right_by="person_id_memb",
    direction="backward",
    allow_exact_matches=True,
)
print("FINISH MERGE AS OF")

FINISH MERGE AS OF


In [None]:
# Validate closed interval; keep all left rows, null out invalid matches (Important!)
rep_initial_size_left = left_migration.shape[0]
rep_init_amount_of_matches = merged.shape[0]
rep_init_amount_of_non_nans = merged["member_startDate_memb"].isna().sum()

valid = merged["member_startDate_memb"].notna() & (merged["date"] <= merged["member_endDate_memb"])
right_cols = right_membership.columns  # columns to blank when invalid
merged.loc[~valid, right_cols] = pd.NA

rep_after_amount_of_non_nans = merged["member_startDate_memb"].isna().sum()

print("Merge Report:")
print("Size of left dataframe:", rep_initial_size_left)
print("Size of merged df: ", rep_init_amount_of_matches)
print("Amount of not matched rows after merge_asof: ", merged.shape[0])
print("Amount of NaNs in column 'member_startDate' after invalidation: ", rep_after_amount_of_non_nans)

print("Result")
display(merged.sort_values(["date", "agenda", "speechnumber"]).head(10))

Merge Report:
Size of left dataframe: 9705
Size of merged df:  9705
Amount of not matched rows after merge_asof:  9705
Amount of NaNs in column 'member_startDate' after invalidation:  0
Result


Unnamed: 0.1,Unnamed: 0,speaker,text,date,agenda,speechnumber,procedure_ID,partyfacts_ID,period,chair,MEP,commission,written,multispeaker,link,translatedText,translationSource,year,block,party,migration_prob,ep_identifier,ep_label,ep_citizenship,ep_placeOfBirth,ep_bday,ep_deathDate,ep_gender,ep_hasMembership,person_id_memb,person_label_memb,hasMembership_memb,party_id_memb,member_startDate_memb,member_endDate_memb,member_role_memb,party_class_memb,country_code_memb,party_temporal_memb,party_label_memb,party_all_labels_memb,party_all_names_memb,party_id_ches,country_code_ches,party_abbrev_ches,party_name_ches,party_name_en_ches
0,292003,Amjad Bashir,"Mr Arias, we have heard about the injustice do...",2014-07-02,12. Programme of activities of the Italian Pre...,41,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr Arias, we have heard about the injustice do...",original_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.257913,124956,Amjad BASHIR,GBR,Jhelum (PK),1952-09-17,,MALE,"[{'id': 'membership/124956-f-143535', 'type': ...",124956,Amjad BASHIR,"[{'id': 'membership/124956-f-143535', 'type': ...",4051,2014-07-01,2015-01-28,MEMBER,NATIONAL_POLITICAL_GROUP,GBR,"{'id': 'time-period/20140701-20190701', 'type'...",UKIP,[UKIP],[United Kingdom Independence Party],1108,GBR,[UKIP],[United Kingdom Independence Party],[United Kingdom Independence Party]
1,292004,Miguel Arias Cañete,"Sí, efectivamente, para luchar contra la inmig...",2014-07-02,12. Programme of activities of the Italian Pre...,42,,6398.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Yes, indeed, to fight against immigration we m...",machine_pl,2014,christian_conservative,PPE,0.494101,1351,Miguel ARIAS CAÑETE,ESP,Madrid,1950-02-24,,MALE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",1351,Miguel ARIAS CAÑETE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",4024,2014-07-01,2014-10-31,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20140701-20190701', 'type'...",PP,[PP],[Partido Popular],502,ESP,[PP],"[Partido Popular, Alianza-Partido Popular]","[People’s Party, People’s Alliance Party]"
2,292025,Monika Smolková,– Taliansko predstavilo veľmi ambiciózny plán ...,2014-07-02,12. Programme of activities of the Italian Pre...,63,,6399.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,- Italy presented a very ambitious plan for it...,machine_pl,2014,social_democratic,PSE/S&D,0.323411,96655,Monika SMOLKOVÁ,SVK,Janovík,1956-10-06,,FEMALE,"[{'id': 'membership/96655-m-15750', 'type': 'M...",96655,Monika SMOLKOVÁ,"[{'id': 'membership/96655-m-15750', 'type': 'M...",4159,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SVK,"{'id': 'time-period/20140701-20190701', 'type'...",SMER-SD,[SMER-SD],[SMER-Sociálna demokracia],2803,SVK,"[Smer, Smer-SD]","[Strana Smer–Tretia Cesta, Smer–sociálna demok...","[Direction–Third Way, Direction–Social Democracy]"
3,291861,Kristina Winberg,Fru talman! Schengenavtalet ingicks i en anda ...,2014-07-14,13. One-minute speeches on matters of politica...,24,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,Fru talman! The Schengen Agreement was conclud...,machine_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.700463,124995,Kristina WINBERG,SWE,,1965-05-27,,FEMALE,"[{'id': 'membership/124995-f-142235', 'type': ...",124995,Kristina WINBERG,"[{'id': 'membership/124995-f-142235', 'type': ...",4253,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SWE,"{'id': 'time-period/20140701-20190701', 'type'...",Sverigedemokraterna,[Sverigedemokraterna],[Sverigedemokraterna],1610,SWE,[SD],[Sverigedemokraterna],[Sweden Democrats]
4,291519,Ελισσάβετ Βόζεμπεργκ-Βρυωνίδη,"Αξιότιμε κύριε Juncker, στις προτεραιότητές σα...",2014-07-15,5. Statement by the candidate for President of...,25,,6398.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,"Dear Mr Juncker, in your priorities you refer ...",machine_pl,2014,christian_conservative,PPE,0.604105,125065,Elissavet VOZEMBERG-VRIONIDI,GRC,Athina,1956-09-14,,FEMALE,"[{'id': 'membership/125065-f-151010', 'type': ...",125065,Elissavet VOZEMBERG-VRIONIDI,"[{'id': 'membership/125065-f-151010', 'type': ...",4055,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,GRC,"{'id': 'time-period/20140701-20190701', 'type'...",N.D.,"[Ν.Δ., N.D.]","[Νέα Δημοκρατία, Nea Demokratia]",402,GRC,[ND],[Néa Dimokratía],[New Democracy]
5,291190,Helga Stevens,Ik denk inderdaad dat het een gedeelde verantw...,2014-07-16,8. Youth employment (debate)2014-07-16,52,,6400.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,I indeed think that it is a shared responsibil...,machine_pl,2014,(extreme)_right,ECR,0.636441,125105,Helga STEVENS,BEL,Sint-Truiden,1968-08-09,,FEMALE,"[{'id': 'membership/125105-f-142627', 'type': ...",125105,Helga STEVENS,"[{'id': 'membership/125105-f-142627', 'type': ...",3986,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,BEL,"{'id': 'time-period/20140701-20190701', 'type'...",N-VA,[N-VA],[Nieuw-Vlaamse Alliantie],110,BEL,"[NVA, VU, VU/NVA]","[Volksunie, Nieuw-Vlaamse Alliantie]","[People’s Union, New Flemish Alliance]"
7,290748,Νότης Μαριάς,"Κύριε Πρόεδρε, οι οικονομικές σχέσεις της Ευρω...",2014-09-15,19. Trade with Euromed countries (debate)2014-...,7,,6400.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr. President, the economic relations of the E...",machine_pl,2014,(extreme)_right,ECR,0.414057,125069,Notis MARIAS,GRC,Thessaloniki,1957-04-05,,MALE,"[{'id': 'membership/125069-f-146981', 'type': ...",125069,Notis MARIAS,"[{'id': 'membership/125069-f-146981', 'type': ...",4269,2014-07-01,2015-01-06,MEMBER,NATIONAL_POLITICAL_GROUP,GRC,"{'id': 'time-period/20140701-20190701', 'type'...",Independent Greeks,"[Ανεξάρτητοι Έλληνες, Independent Greeks]","[Ανεξάρτητοι Έλληνες, Independent Greeks]",412,GRC,[ANEL],[Anexartitoi Ellines],[Independent Greeks]
8,290785,Tim Aker,"Mr President, on 16 August 2014, 35 migrants c...",2014-09-15,21. One-minute speeches (Rule 163)2014-09-15,9,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, on 16 August 2014, 35 migrants c...",original_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.363552,99650,Tim AKER,GBR,Orsett,1985-05-23,,MALE,"[{'id': 'membership/99650-f-152990', 'type': '...",99650,Tim AKER,"[{'id': 'membership/99650-f-152990', 'type': '...",4051,2014-07-01,2018-12-05,MEMBER,NATIONAL_POLITICAL_GROUP,GBR,"{'id': 'time-period/20140701-20190701', 'type'...",UKIP,[UKIP],[United Kingdom Independence Party],1108,GBR,[UKIP],[United Kingdom Independence Party],[United Kingdom Independence Party]
6,290792,Laura Ferrara,"Signor Presidente, onorevoli colleghi, la cris...",2014-09-15,21. One-minute speeches (Rule 163)2014-09-15,16,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr President, ladies and gentlemen, the Italia...",machine_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.744003,124833,Laura FERRARA,ITA,Napoli,1983-09-11,,FEMALE,"[{'id': 'membership/124833-m-15310', 'type': '...",124833,Laura FERRARA,"[{'id': 'membership/124833-m-15310', 'type': '...",4083,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,ITA,"{'id': 'time-period/20140701-20190701', 'type'...",M5S,[M5S],[Movimento 5 Stelle],845,ITA,"[M5S, MS5]",[MoVimento Cinque Stelle],[Five Star Movement]
11,289949,James Carver,Does my colleague share the opinions of her co...,2014-09-17,14. EU response to the Ebola outbreak (debate)...,15,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,Does my colleague share the opinions of her co...,original_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.335978,124971,James CARVER,GBR,,1969-08-15,,MALE,"[{'id': 'membership/124971-f-142125', 'type': ...",124971,James CARVER,"[{'id': 'membership/124971-f-142125', 'type': ...",4051,2014-07-01,2018-05-27,MEMBER,NATIONAL_POLITICAL_GROUP,GBR,"{'id': 'time-period/20140701-20190701', 'type'...",UKIP,[UKIP],[United Kingdom Independence Party],1108,GBR,[UKIP],[United Kingdom Independence Party],[United Kingdom Independence Party]


Wonderful this looks good, so let's proceed with the original merge

In [None]:
# Work on copies
speeches = original_migration_enriched_df.copy(deep=True)
memb = original_nat_memberships_df.copy(deep=True)

# quick check we do not have any null speeches
assert speeches["date"].isna().sum() == 0

# Parse dates (YYYY-MM-DD)
speeches["date"] = pd.to_datetime(speeches["date"], format="%Y-%m-%d", errors="raise")
memb["member_startDate_memb"] = pd.to_datetime(memb["member_startDate_memb"], format="%Y-%m-%d", errors="raise")
memb["member_endDate_memb"] = pd.to_datetime(memb["member_endDate_memb"],   format="%Y-%m-%d", errors="raise")

# Required sorting for merge_asof
speeches = speeches.sort_values(["date", "ep_identifier"])
memb = memb.sort_values(["member_startDate_memb", "person_id_memb"])

# As-of join: latest membership start <= speech date, per person
merged = pd.merge_asof(
    speeches,
    memb,
    left_on="date",
    right_on="member_startDate_memb",
    left_by="ep_identifier",
    right_by="person_id_memb",
    direction="backward",
    allow_exact_matches=True,
)

# Closed-interval validity check
valid = (
    merged["person_id_memb"].notna() &
    (merged["date"] <= merged["member_endDate_memb"])
)

# Invalidate memberships that don't cover the speech date
membership_cols = memb.columns
merged.loc[~valid, membership_cols] = pd.NA

In [None]:
# work on copy
merged_copy = merged.copy(deep=True)

In [None]:
display(merged_copy.head())
# How many speeches have no membership?
print("Speeches without membership:",
      merged_copy["person_id_memb"].isna().sum())

# Should be true if memberships are clean
assert merged_copy.groupby("ep_identifier")["member_startDate_memb"].is_monotonic_increasing.any() is not None

print("Left monotonic:", speeches["date"].is_monotonic_increasing) # should be true
print("Right monotonic:", memb["member_startDate_memb"].is_monotonic_increasing) # should be true

# how many speeches do not have a ches score?
print("Amount of speeches in migration in total:", merged_copy.shape[0])
print(original_migration_enriched_df.shape[0])
print("Speeches without ches score:",
      merged_copy["party_id_ches"].isna().sum())


Unnamed: 0.1,Unnamed: 0,speaker,text,date,agenda,speechnumber,procedure_ID,partyfacts_ID,period,chair,MEP,commission,written,multispeaker,link,translatedText,translationSource,year,block,party,migration_prob,ep_identifier,ep_label,ep_citizenship,ep_placeOfBirth,ep_bday,ep_deathDate,ep_gender,ep_hasMembership,person_id_memb,person_label_memb,hasMembership_memb,party_id_memb,member_startDate_memb,member_endDate_memb,member_role_memb,party_class_memb,country_code_memb,party_temporal_memb,party_label_memb,party_all_labels_memb,party_all_names_memb,party_id_ches,country_code_ches,party_abbrev_ches,party_name_ches,party_name_en_ches
0,292003,Amjad Bashir,"Mr Arias, we have heard about the injustice do...",2014-07-02,12. Programme of activities of the Italian Pre...,41,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr Arias, we have heard about the injustice do...",original_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.257913,124956,Amjad BASHIR,GBR,Jhelum (PK),1952-09-17,,MALE,"[{'id': 'membership/124956-f-143535', 'type': ...",124956,Amjad BASHIR,"[{'id': 'membership/124956-f-143535', 'type': ...",4051,2014-07-01,2015-01-28,MEMBER,NATIONAL_POLITICAL_GROUP,GBR,"{'id': 'time-period/20140701-20190701', 'type'...",UKIP,[UKIP],[United Kingdom Independence Party],1108,GBR,[UKIP],[United Kingdom Independence Party],[United Kingdom Independence Party]
1,292004,Miguel Arias Cañete,"Sí, efectivamente, para luchar contra la inmig...",2014-07-02,12. Programme of activities of the Italian Pre...,42,,6398.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Yes, indeed, to fight against immigration we m...",machine_pl,2014,christian_conservative,PPE,0.494101,1351,Miguel ARIAS CAÑETE,ESP,Madrid,1950-02-24,,MALE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",1351,Miguel ARIAS CAÑETE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",4024,2014-07-01,2014-10-31,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20140701-20190701', 'type'...",PP,[PP],[Partido Popular],502,ESP,[PP],"[Partido Popular, Alianza-Partido Popular]","[People’s Party, People’s Alliance Party]"
2,292025,Monika Smolková,– Taliansko predstavilo veľmi ambiciózny plán ...,2014-07-02,12. Programme of activities of the Italian Pre...,63,,6399.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,- Italy presented a very ambitious plan for it...,machine_pl,2014,social_democratic,PSE/S&D,0.323411,96655,Monika SMOLKOVÁ,SVK,Janovík,1956-10-06,,FEMALE,"[{'id': 'membership/96655-m-15750', 'type': 'M...",96655,Monika SMOLKOVÁ,"[{'id': 'membership/96655-m-15750', 'type': 'M...",4159,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SVK,"{'id': 'time-period/20140701-20190701', 'type'...",SMER-SD,[SMER-SD],[SMER-Sociálna demokracia],2803,SVK,"[Smer, Smer-SD]","[Strana Smer–Tretia Cesta, Smer–sociálna demok...","[Direction–Third Way, Direction–Social Democracy]"
3,291861,Kristina Winberg,Fru talman! Schengenavtalet ingicks i en anda ...,2014-07-14,13. One-minute speeches on matters of politica...,24,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,Fru talman! The Schengen Agreement was conclud...,machine_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.700463,124995,Kristina WINBERG,SWE,,1965-05-27,,FEMALE,"[{'id': 'membership/124995-f-142235', 'type': ...",124995,Kristina WINBERG,"[{'id': 'membership/124995-f-142235', 'type': ...",4253,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SWE,"{'id': 'time-period/20140701-20190701', 'type'...",Sverigedemokraterna,[Sverigedemokraterna],[Sverigedemokraterna],1610,SWE,[SD],[Sverigedemokraterna],[Sweden Democrats]
4,291519,Ελισσάβετ Βόζεμπεργκ-Βρυωνίδη,"Αξιότιμε κύριε Juncker, στις προτεραιότητές σα...",2014-07-15,5. Statement by the candidate for President of...,25,,6398.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,"Dear Mr Juncker, in your priorities you refer ...",machine_pl,2014,christian_conservative,PPE,0.604105,125065,Elissavet VOZEMBERG-VRIONIDI,GRC,Athina,1956-09-14,,FEMALE,"[{'id': 'membership/125065-f-151010', 'type': ...",125065,Elissavet VOZEMBERG-VRIONIDI,"[{'id': 'membership/125065-f-151010', 'type': ...",4055,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,GRC,"{'id': 'time-period/20140701-20190701', 'type'...",N.D.,"[Ν.Δ., N.D.]","[Νέα Δημοκρατία, Nea Demokratia]",402,GRC,[ND],[Néa Dimokratía],[New Democracy]


Speeches without membership: 0
Left monotonic: True
Right monotonic: True
Amount of speeches in migration in total: 9705
9705
Speeches without ches score: 848


In [None]:
test = merged.loc[merged["member_startDate_memb"] <= "2014-07-01"] #, ["date", "member_startDate", "member_endDate"]] # first speech in migration dataset on new term (beginning in 01.07.2014)
display(test.head())

Unnamed: 0.1,Unnamed: 0,speaker,text,date,agenda,speechnumber,procedure_ID,partyfacts_ID,period,chair,MEP,commission,written,multispeaker,link,translatedText,translationSource,year,block,party,migration_prob,ep_identifier,ep_label,ep_citizenship,ep_placeOfBirth,ep_bday,ep_deathDate,ep_gender,ep_hasMembership,person_id_memb,person_label_memb,hasMembership_memb,party_id_memb,member_startDate_memb,member_endDate_memb,member_role_memb,party_class_memb,country_code_memb,party_temporal_memb,party_label_memb,party_all_labels_memb,party_all_names_memb,party_id_ches,country_code_ches,party_abbrev_ches,party_name_ches,party_name_en_ches
0,292003,Amjad Bashir,"Mr Arias, we have heard about the injustice do...",2014-07-02,12. Programme of activities of the Italian Pre...,41,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Mr Arias, we have heard about the injustice do...",original_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.257913,124956,Amjad BASHIR,GBR,Jhelum (PK),1952-09-17,,MALE,"[{'id': 'membership/124956-f-143535', 'type': ...",124956,Amjad BASHIR,"[{'id': 'membership/124956-f-143535', 'type': ...",4051,2014-07-01,2015-01-28,MEMBER,NATIONAL_POLITICAL_GROUP,GBR,"{'id': 'time-period/20140701-20190701', 'type'...",UKIP,[UKIP],[United Kingdom Independence Party],1108,GBR,[UKIP],[United Kingdom Independence Party],[United Kingdom Independence Party]
1,292004,Miguel Arias Cañete,"Sí, efectivamente, para luchar contra la inmig...",2014-07-02,12. Programme of activities of the Italian Pre...,42,,6398.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,"Yes, indeed, to fight against immigration we m...",machine_pl,2014,christian_conservative,PPE,0.494101,1351,Miguel ARIAS CAÑETE,ESP,Madrid,1950-02-24,,MALE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",1351,Miguel ARIAS CAÑETE,"[{'id': 'membership/1351-f-105362', 'type': 'M...",4024,2014-07-01,2014-10-31,MEMBER,NATIONAL_POLITICAL_GROUP,ESP,"{'id': 'time-period/20140701-20190701', 'type'...",PP,[PP],[Partido Popular],502,ESP,[PP],"[Partido Popular, Alianza-Partido Popular]","[People’s Party, People’s Alliance Party]"
2,292025,Monika Smolková,– Taliansko predstavilo veľmi ambiciózny plán ...,2014-07-02,12. Programme of activities of the Italian Pre...,63,,6399.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,- Italy presented a very ambitious plan for it...,machine_pl,2014,social_democratic,PSE/S&D,0.323411,96655,Monika SMOLKOVÁ,SVK,Janovík,1956-10-06,,FEMALE,"[{'id': 'membership/96655-m-15750', 'type': 'M...",96655,Monika SMOLKOVÁ,"[{'id': 'membership/96655-m-15750', 'type': 'M...",4159,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SVK,"{'id': 'time-period/20140701-20190701', 'type'...",SMER-SD,[SMER-SD],[SMER-Sociálna demokracia],2803,SVK,"[Smer, Smer-SD]","[Strana Smer–Tretia Cesta, Smer–sociálna demok...","[Direction–Third Way, Direction–Social Democracy]"
3,291861,Kristina Winberg,Fru talman! Schengenavtalet ingicks i en anda ...,2014-07-14,13. One-minute speeches on matters of politica...,24,,6404.0,8,False,True,False,False,False,https://www.europarl.europa.eu/doceo/document/...,Fru talman! The Schengen Agreement was conclud...,machine_pl,2014,(extreme)_right,EDD/INDDEM/EFD,0.700463,124995,Kristina WINBERG,SWE,,1965-05-27,,FEMALE,"[{'id': 'membership/124995-f-142235', 'type': ...",124995,Kristina WINBERG,"[{'id': 'membership/124995-f-142235', 'type': ...",4253,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,SWE,"{'id': 'time-period/20140701-20190701', 'type'...",Sverigedemokraterna,[Sverigedemokraterna],[Sverigedemokraterna],1610,SWE,[SD],[Sverigedemokraterna],[Sweden Democrats]
4,291519,Ελισσάβετ Βόζεμπεργκ-Βρυωνίδη,"Αξιότιμε κύριε Juncker, στις προτεραιότητές σα...",2014-07-15,5. Statement by the candidate for President of...,25,,6398.0,8,False,True,False,True,False,https://www.europarl.europa.eu/doceo/document/...,"Dear Mr Juncker, in your priorities you refer ...",machine_pl,2014,christian_conservative,PPE,0.604105,125065,Elissavet VOZEMBERG-VRIONIDI,GRC,Athina,1956-09-14,,FEMALE,"[{'id': 'membership/125065-f-151010', 'type': ...",125065,Elissavet VOZEMBERG-VRIONIDI,"[{'id': 'membership/125065-f-151010', 'type': ...",4055,2014-07-01,2019-07-01,MEMBER,NATIONAL_POLITICAL_GROUP,GRC,"{'id': 'time-period/20140701-20190701', 'type'...",N.D.,"[Ν.Δ., N.D.]","[Νέα Δημοκρατία, Nea Demokratia]",402,GRC,[ND],[Néa Dimokratía],[New Democracy]


Alrighty, lets store this for the final merge/enrichment with the CHES survey scores/waves

In [None]:
# store as parquet file
#merged.to_parquet(PATH_INTERMED_CHES_54_MIG_W_CHES_META, engine='fastparquet')