In [1]:
import pandas as pd

In [75]:
import requests
mep_data = {}
for term in range(10):
    response = requests.get("https://data.europarl.europa.eu/api/v2/meps", 
                            params={"parliamentary-term": term, "format": "application/ld+json"})
    if response.status_code == 200:
        mep_data[term] = response.json()['data']
    else:
        print(f"Couldn't find data on LT: {term}")

In [78]:
mep_df = pd.DataFrame([{**mep, "period": term} for term, data in mep_data.items() for mep in data])

In [79]:
mep_df.head()

Unnamed: 0,id,type,identifier,label,familyName,sortLabel,period,givenName,officialFamilyName,officialGivenName
0,person/452,Person,452,ARDWICK,Ardwick,ARDWICK,0,,,
1,person/454,Person,454,Barend Willem BIESHEUVEL,Biesheuvel,BIESHEUVEL,0,Barend Willem,,
2,person/261,Person,261,Aldo MASULLO,Masullo,MASULLO,0,Aldo,,
3,person/573,Person,573,Pierre BILLOTTE,Billotte,BILLOTTE,0,Pierre,,
4,person/286,Person,286,Siegfried MEISTER,Meister,MEISTER,0,Siegfried,,


In [88]:
mep_df.to_parquet("MEP_MINIMAL_DATA.parquet")

In [107]:
metadata = []
identifiers = list(set(mep_df['identifier']))
batch_size = 128
idx = 0
while idx < len(identifiers):
    print(f"idx: {idx}")
    batch = identifiers[idx: idx + batch_size]
    response = requests.get(f"https://data.europarl.europa.eu/api/v2/meps/{','.join(batch)}", 
                                params={"format": "application/ld+json"})
    
    if response.status_code == 200:
        metadata.extend(response.json()['data'])
    else:
        print(f"An error occured: {response.status_code}")

    idx += batch_size

idx: 0
idx: 128
idx: 256
idx: 384
idx: 512
idx: 640
idx: 768
idx: 896
idx: 1024
idx: 1152
idx: 1280
idx: 1408
idx: 1536
idx: 1664
idx: 1792
idx: 1920
idx: 2048
idx: 2176
idx: 2304
idx: 2432
idx: 2560
idx: 2688
idx: 2816
idx: 2944
idx: 3072
idx: 3200
idx: 3328
idx: 3456
idx: 3584
idx: 3712
idx: 3840
idx: 3968
idx: 4096
idx: 4224
idx: 4352
idx: 4480
idx: 4608
idx: 4736
idx: 4864


In [108]:
len(metadata)

4885

In [111]:
mep_metadata = pd.DataFrame(metadata)

In [114]:
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,hasGender,hasHonorificPrefix,hasMembership,citizenship,...,upperFamilyName,upperGivenName,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,http://publications.europa.eu/resource/authori...,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1008-m-8666', 'type': 'Mem...",http://publications.europa.eu/resource/authori...,...,FORLANI,ARNALDO,,,,,,,,
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,http://publications.europa.eu/resource/authori...,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1053-f-79079', 'type': 'Me...",http://publications.europa.eu/resource/authori...,...,ONUR,LEYLA,,,,,,,,
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,http://publications.europa.eu/resource/authori...,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1055-f-95196', 'type': 'Me...",http://publications.europa.eu/resource/authori...,...,QUISTHOUDT-ROWOHL,GODELIEVE,,,,,,,,
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,http://publications.europa.eu/resource/authori...,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1091-f-81455', 'type': 'Me...",http://publications.europa.eu/resource/authori...,...,SPECIALE,ROBERTO,,,,,,,,
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,http://publications.europa.eu/resource/authori...,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/112071-f-138094', 'type': ...",http://publications.europa.eu/resource/authori...,...,COSTELLO,EMER,,,,,,,,


In [122]:
mep_metadata['gender'] = mep_metadata['hasGender'].apply(lambda gstr: gstr[len("http://publications.europa.eu/resource/authority/human-sex/"):])
mep_metadata = mep_metadata.drop('hasGender', axis='columns')
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,hasHonorificPrefix,hasMembership,citizenship,placeOfBirth,...,upperGivenName,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage,gender
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1008-m-8666', 'type': 'Mem...",http://publications.europa.eu/resource/authori...,Pesaro,...,ARNALDO,,,,,,,,,MALE
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1053-f-79079', 'type': 'Me...",http://publications.europa.eu/resource/authori...,Braunschweig,...,LEYLA,,,,,,,,,FEMALE
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1055-f-95196', 'type': 'Me...",http://publications.europa.eu/resource/authori...,Etterbeek (Belgien),...,GODELIEVE,,,,,,,,,FEMALE
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1091-f-81455', 'type': 'Me...",http://publications.europa.eu/resource/authori...,Chiavari (Genova),...,ROBERTO,,,,,,,,,MALE
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/112071-f-138094', 'type': ...",http://publications.europa.eu/resource/authori...,Louth,...,EMER,,,,,,,,,FEMALE


In [123]:
mep_metadata['citizenship'] = mep_metadata['citizenship'].apply(lambda gstr: gstr[len("http://publications.europa.eu/resource/authority/country/"):])
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,hasHonorificPrefix,hasMembership,citizenship,placeOfBirth,...,upperGivenName,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage,gender
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1008-m-8666', 'type': 'Mem...",ITA,Pesaro,...,ARNALDO,,,,,,,,,MALE
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1053-f-79079', 'type': 'Me...",DEU,Braunschweig,...,LEYLA,,,,,,,,,FEMALE
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1055-f-95196', 'type': 'Me...",DEU,Etterbeek (Belgien),...,GODELIEVE,,,,,,,,,FEMALE
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/1091-f-81455', 'type': 'Me...",ITA,Chiavari (Genova),...,ROBERTO,,,,,,,,,MALE
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,http://publications.europa.eu/resource/authori...,"[{'id': 'membership/112071-f-138094', 'type': ...",IRL,Louth,...,EMER,,,,,,,,,FEMALE


In [129]:
mep_metadata['honorific_prefix'] =  mep_metadata['hasHonorificPrefix'].apply(lambda prefix: prefix[len("http://publications.europa.eu/resource/authority/honorific/"):] if isinstance(prefix, str) else prefix)
mep_metadata = mep_metadata.drop('hasHonorificPrefix', axis='columns')
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,hasMembership,citizenship,placeOfBirth,familyName,...,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage,gender,honorific_prefix
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,"[{'id': 'membership/1008-m-8666', 'type': 'Mem...",ITA,Pesaro,Forlani,...,,,,,,,,,MALE,MR
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,"[{'id': 'membership/1053-f-79079', 'type': 'Me...",DEU,Braunschweig,Onur,...,,,,,,,,,FEMALE,MS
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,"[{'id': 'membership/1055-f-95196', 'type': 'Me...",DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,...,,,,,,,,,FEMALE,MS
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,"[{'id': 'membership/1091-f-81455', 'type': 'Me...",ITA,Chiavari (Genova),Speciale,...,,,,,,,,,MALE,MR
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,"[{'id': 'membership/112071-f-138094', 'type': ...",IRL,Louth,Costello,...,,,,,,,,,FEMALE,MS


In [214]:
def extract_national_political_group(memberships):
    national_parties = [{"id": membership.get('organization', '')[len("org/"):],
                         "startDate": membership['memberDuring'].get('startDate'),
                         "endDate": membership['memberDuring'].get('endDate')} for membership in memberships if
                         membership.get('membershipClassification', "") == 'def/ep-entities/NATIONAL_POLITICAL_GROUP']
    return national_parties

mep_metadata['national_parties'] = mep_metadata['hasMembership'].apply(extract_national_political_group)

In [233]:
max(mep_metadata['national_parties'].iloc[2], key=lambda dp: dp['startDate'])

{'id': '4008', 'startDate': '2014-07-01', 'endDate': '2019-07-01'}

In [235]:
mep_metadata['latest_np_affiliation'] = mep_metadata['national_parties'].apply(lambda parties: max(parties, key=lambda dp: dp['startDate'])['id'])
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,hasMembership,citizenship,placeOfBirth,familyName,...,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage,gender,honorific_prefix,national_parties,latest_np_affiliation
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,"[{'id': 'membership/1008-m-8666', 'type': 'Mem...",ITA,Pesaro,Forlani,...,,,,,,,MALE,MR,"[{'id': '625', 'startDate': '1989-07-25', 'end...",625
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,"[{'id': 'membership/1053-f-79079', 'type': 'Me...",DEU,Braunschweig,Onur,...,,,,,,,FEMALE,MS,"[{'id': '741', 'startDate': '1989-07-25', 'end...",741
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,"[{'id': 'membership/1055-f-95196', 'type': 'Me...",DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,...,,,,,,,FEMALE,MS,"[{'id': '1375', 'startDate': '2004-07-20', 'en...",4008
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,"[{'id': 'membership/1091-f-81455', 'type': 'Me...",ITA,Chiavari (Genova),Speciale,...,,,,,,,MALE,MR,"[{'id': '863', 'startDate': '1991-01-01', 'end...",913
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,"[{'id': 'membership/112071-f-138094', 'type': ...",IRL,Louth,Costello,...,,,,,,,FEMALE,MS,"[{'id': '2760', 'startDate': '2012-02-15', 'en...",2760


In [237]:
mep_metadata = mep_metadata.drop('hasMembership', axis='columns')
mep_metadata.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,citizenship,placeOfBirth,familyName,givenName,...,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,hasEmail,account,homepage,gender,honorific_prefix,national_parties,latest_np_affiliation
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,ITA,Pesaro,Forlani,Arnaldo,...,,,,,,,MALE,MR,"[{'id': '625', 'startDate': '1989-07-25', 'end...",625
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,DEU,Braunschweig,Onur,Leyla,...,,,,,,,FEMALE,MS,"[{'id': '741', 'startDate': '1989-07-25', 'end...",741
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,Godelieve,...,,,,,,,FEMALE,MS,"[{'id': '1375', 'startDate': '2004-07-20', 'en...",4008
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,ITA,Chiavari (Genova),Speciale,Roberto,...,,,,,,,MALE,MR,"[{'id': '863', 'startDate': '1991-01-01', 'end...",913
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,IRL,Louth,Costello,Emer,...,,,,,,,FEMALE,MS,"[{'id': '2760', 'startDate': '2012-02-15', 'en...",2760


In [245]:
mep_metadata['placeOfBirth'] = mep_metadata['placeOfBirth'].apply(lambda x: str(x) if isinstance(x, list) else x)

In [None]:
mep_metadata = mep_metadata.drop('hasEmail', axis='columns')
mep_metadata = mep_metadata.drop('homepage', axis='columns')


In [252]:
mep_metadata.to_parquet("data/MEP_DATA.parquet")

In [172]:
national_party_ids = set(mep_metadata['hasMembership'].apply(extract_national_political_group).explode().apply(lambda org: org['id']))

In [173]:
len(national_party_ids)

1660

In [None]:
national_parties = []
national_party_ids = list(national_party_ids)
batch_size = 128
idx = 0
while idx < len(national_party_ids):
    print(f"idx: {idx}")
    batch = national_party_ids[idx: idx + batch_size]
    response = requests.get(f"https://data.europarl.europa.eu/api/v2/corporate-bodies/{','.join(batch)}", 
                                params={"format": "application/ld+json"})
    
    if response.status_code == 200:
        national_parties.extend(response.json()['data'])
    else:
        print(f"An error occured: {response.status_code}")

    idx += batch_size

idx: 0
idx: 128
idx: 256
idx: 384
idx: 512
idx: 640
idx: 768
idx: 896
idx: 1024
idx: 1152
idx: 1280
idx: 1408
idx: 1536
idx: 1664
idx: 1792
idx: 1920
idx: 2048
idx: 2176
idx: 2304
idx: 2432
idx: 2560
idx: 2688
idx: 2816
idx: 2944
idx: 3072
idx: 3200
idx: 3328
idx: 3456
idx: 3584
idx: 3712
idx: 3840
idx: 3968
idx: 4096
idx: 4224
idx: 4352
idx: 4480
idx: 4608
idx: 4736
idx: 4864


In [180]:
parties_df = pd.DataFrame(national_parties)
parties_df.head()

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
0,org/1008,Organization,1008,EU_PARLIAMENT,"{'id': 'time-period/19990721-20020114', 'type'...",D14,"{'it': 'Giappone', 'ro': 'Japonia', 'pl': 'Jap...",1008,1008,{'ro': 'Delegația pentru relațiile cu Japonia'...,def/ep-entities/DELEGATION_PARLIAMENTARY,,,,
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,"{'hu': 'SGP', 'fr': 'SGP', 'it': 'SGP', 'lv': ...",1053,1053,"{'es': 'Staatkundig Gereformeerde Partij', 'et...",def/ep-entities/NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,"{'ro': 'UDF-PSD', 'da': 'UDF-PSD', 'pl': 'UDF-...",1055,1055,{'sk': 'Union pour la démocratie française - P...,def/ep-entities/NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
3,org/1091,Organization,1091,EU_PARLIAMENT,"{'id': 'time-period/20020115-20021217', 'type'...",FIAP,"{'da': 'Mund- og Klovsyge', 'es': 'Fiebre Afto...",1091,1091,{'ga': 'Coiste sealadach um an ngalar crúibe a...,def/ep-entities/COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/FIAP,,
4,org/113,Organization,113,EU_PARLIAMENT,"{'id': 'time-period/19951214-19960717', 'type'...",TRANSIT,{'hr': 'Istraga o tranzitnom sustavu Zajednice...,113,113,{'mt': 'Kumitat temporanju ta’ Inkjesta fis-Si...,def/ep-entities/COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/TRANSIT,,


In [181]:
parties_df['classification'] = parties_df['classification'].apply(lambda cls_: cls_[len("def/ep-entities/"):])
parties_df.head()

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
0,org/1008,Organization,1008,EU_PARLIAMENT,"{'id': 'time-period/19990721-20020114', 'type'...",D14,"{'it': 'Giappone', 'ro': 'Japonia', 'pl': 'Jap...",1008,1008,{'ro': 'Delegația pentru relațiile cu Japonia'...,DELEGATION_PARLIAMENTARY,,,,
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,"{'hu': 'SGP', 'fr': 'SGP', 'it': 'SGP', 'lv': ...",1053,1053,"{'es': 'Staatkundig Gereformeerde Partij', 'et...",NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,"{'ro': 'UDF-PSD', 'da': 'UDF-PSD', 'pl': 'UDF-...",1055,1055,{'sk': 'Union pour la démocratie française - P...,NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
3,org/1091,Organization,1091,EU_PARLIAMENT,"{'id': 'time-period/20020115-20021217', 'type'...",FIAP,"{'da': 'Mund- og Klovsyge', 'es': 'Fiebre Afto...",1091,1091,{'ga': 'Coiste sealadach um an ngalar crúibe a...,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/FIAP,,
4,org/113,Organization,113,EU_PARLIAMENT,"{'id': 'time-period/19951214-19960717', 'type'...",TRANSIT,{'hr': 'Istraga o tranzitnom sustavu Zajednice...,113,113,{'mt': 'Kumitat temporanju ta’ Inkjesta fis-Si...,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/TRANSIT,,


In [184]:
parties_df['prefLabel'] = parties_df['prefLabel'].apply(lambda label: label['en'])
parties_df.head()

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
0,org/1008,Organization,1008,EU_PARLIAMENT,"{'id': 'time-period/19990721-20020114', 'type'...",D14,"{'it': 'Giappone', 'ro': 'Japonia', 'pl': 'Jap...",1008,1008,Delegation for relations with Japan,DELEGATION_PARLIAMENTARY,,,,
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,"{'hu': 'SGP', 'fr': 'SGP', 'it': 'SGP', 'lv': ...",1053,1053,Staatkundig Gereformeerde Partij,NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,"{'ro': 'UDF-PSD', 'da': 'UDF-PSD', 'pl': 'UDF-...",1055,1055,Union pour la démocratie française - Parti soc...,NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
3,org/1091,Organization,1091,EU_PARLIAMENT,"{'id': 'time-period/20020115-20021217', 'type'...",FIAP,"{'da': 'Mund- og Klovsyge', 'es': 'Fiebre Afto...",1091,1091,Temporary committee on foot and mouth disease,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/FIAP,,
4,org/113,Organization,113,EU_PARLIAMENT,"{'id': 'time-period/19951214-19960717', 'type'...",TRANSIT,{'hr': 'Istraga o tranzitnom sustavu Zajednice...,113,113,Temporary committee of Inquiry into the Commun...,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/TRANSIT,,


In [185]:
parties_df['altLabel'] = parties_df['altLabel'].apply(lambda label: label['en'])
parties_df.head()

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
0,org/1008,Organization,1008,EU_PARLIAMENT,"{'id': 'time-period/19990721-20020114', 'type'...",D14,Japan,1008,1008,Delegation for relations with Japan,DELEGATION_PARLIAMENTARY,,,,
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,SGP,1053,1053,Staatkundig Gereformeerde Partij,NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,UDF-PSD,1055,1055,Union pour la démocratie française - Parti soc...,NATIONAL_POLITICAL_GROUP,[http://publications.europa.eu/resource/author...,,,
3,org/1091,Organization,1091,EU_PARLIAMENT,"{'id': 'time-period/20020115-20021217', 'type'...",FIAP,Foot and mouth disease,1091,1091,Temporary committee on foot and mouth disease,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/FIAP,,
4,org/113,Organization,113,EU_PARLIAMENT,"{'id': 'time-period/19951214-19960717', 'type'...",TRANSIT,Inquiry into the Community Transit System,113,113,Temporary committee of Inquiry into the Commun...,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/TRANSIT,,


In [192]:
parties_df['represents'] = parties_df['represents'].apply(lambda rep: rep[0][len('http://publications.europa.eu/resource/authority/country/'):] if isinstance(rep, list) else rep)
parties_df.head()

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
0,org/1008,Organization,1008,EU_PARLIAMENT,"{'id': 'time-period/19990721-20020114', 'type'...",D14,Japan,1008,1008,Delegation for relations with Japan,DELEGATION_PARLIAMENTARY,,,,
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,SGP,1053,1053,Staatkundig Gereformeerde Partij,NATIONAL_POLITICAL_GROUP,NLD,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,UDF-PSD,1055,1055,Union pour la démocratie française - Parti soc...,NATIONAL_POLITICAL_GROUP,FRA,,,
3,org/1091,Organization,1091,EU_PARLIAMENT,"{'id': 'time-period/20020115-20021217', 'type'...",FIAP,Foot and mouth disease,1091,1091,Temporary committee on foot and mouth disease,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/FIAP,,
4,org/113,Organization,113,EU_PARLIAMENT,"{'id': 'time-period/19951214-19960717', 'type'...",TRANSIT,Inquiry into the Community Transit System,113,113,Temporary committee of Inquiry into the Commun...,COMMITTEE_PARLIAMENTARY_TEMPORARY,,org/TRANSIT,,


In [193]:
parties_df[parties_df['classification'] == 'NATIONAL_POLITICAL_GROUP']

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'id': 'time-period/19840724-19940718', 'type'...",SGP,SGP,1053,1053,Staatkundig Gereformeerde Partij,NATIONAL_POLITICAL_GROUP,NLD,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'id': 'time-period/19861212-19900610', 'type'...",UDF-PSD,UDF-PSD,1055,1055,Union pour la démocratie française - Parti soc...,NATIONAL_POLITICAL_GROUP,FRA,,,
5,org/1205,Organization,1205,EU_PARLIAMENT,"{'id': 'time-period/20030422-20040719', 'type'...",TSP,TSP,1205,1205,Tautas Saskanas Partija,NATIONAL_POLITICAL_GROUP,LVA,,,
6,org/1216,Organization,1216,EU_PARLIAMENT,"{'id': 'time-period/20030330-20040719', 'type'...",MRE,MRE,1216,1216,Movimento Repubblicani Europei,NATIONAL_POLITICAL_GROUP,ITA,,,
9,org/1320,Organization,1320,EU_PARLIAMENT,"{'id': 'time-period/20040720-20090713', 'type'...",VLD-Vivant,VLD-Vivant,1320,1320,Vlaamse Liberalen en Democraten - Vivant,NATIONAL_POLITICAL_GROUP,BEL,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1425,org/4255,Organization,4255,EU_PARLIAMENT,"{'id': 'time-period/20140701-20190701', 'type'...",PODEMOS,PODEMOS,4255,4255,PODEMOS,NATIONAL_POLITICAL_GROUP,ESP,,,
1426,org/711,Organization,711,EU_PARLIAMENT,"{'id': 'time-period/19520905-19990719', 'type'...",PSD,PSD,711,711,Parti social démocrate,NATIONAL_POLITICAL_GROUP,FRA,,,
1427,org/836,Organization,836,EU_PARLIAMENT,"{'id': 'time-period/19840724-20040719', 'type'...",FN,FN,836,836,Front national,NATIONAL_POLITICAL_GROUP,FRA,,,
1428,org/905,Organization,905,EU_PARLIAMENT,"{'id': 'time-period/19520710-19611214', 'type'...",RGR,RGR,905,905,Rassemblement des gauches républicaines,NATIONAL_POLITICAL_GROUP,FRA,,,


In [196]:
parties_df[parties_df['classification'] == 'NATIONAL_POLITICAL_GROUP'].to_parquet("data/PARTIES.parquet")

In [2]:
pd.read_parquet("data/PARTIES.parquet")

Unnamed: 0,id,type,identifier,source,temporal,label,altLabel,notation_providerTemporalBodyId,notation_codictBodyId,prefLabel,classification,represents,isVersionOf,hasSubOrganization,linkedTo
1,org/1053,Organization,1053,EU_PARLIAMENT,"{'endDate': '1994-07-18', 'id': 'time-period/1...",SGP,SGP,1053,1053,Staatkundig Gereformeerde Partij,NATIONAL_POLITICAL_GROUP,NLD,,,
2,org/1055,Organization,1055,EU_PARLIAMENT,"{'endDate': '1990-06-10', 'id': 'time-period/1...",UDF-PSD,UDF-PSD,1055,1055,Union pour la démocratie française - Parti soc...,NATIONAL_POLITICAL_GROUP,FRA,,,
5,org/1205,Organization,1205,EU_PARLIAMENT,"{'endDate': '2004-07-19', 'id': 'time-period/2...",TSP,TSP,1205,1205,Tautas Saskanas Partija,NATIONAL_POLITICAL_GROUP,LVA,,,
6,org/1216,Organization,1216,EU_PARLIAMENT,"{'endDate': '2004-07-19', 'id': 'time-period/2...",MRE,MRE,1216,1216,Movimento Repubblicani Europei,NATIONAL_POLITICAL_GROUP,ITA,,,
9,org/1320,Organization,1320,EU_PARLIAMENT,"{'endDate': '2009-07-13', 'id': 'time-period/2...",VLD-Vivant,VLD-Vivant,1320,1320,Vlaamse Liberalen en Democraten - Vivant,NATIONAL_POLITICAL_GROUP,BEL,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1425,org/4255,Organization,4255,EU_PARLIAMENT,"{'endDate': '2019-07-01', 'id': 'time-period/2...",PODEMOS,PODEMOS,4255,4255,PODEMOS,NATIONAL_POLITICAL_GROUP,ESP,,,
1426,org/711,Organization,711,EU_PARLIAMENT,"{'endDate': '1999-07-19', 'id': 'time-period/1...",PSD,PSD,711,711,Parti social démocrate,NATIONAL_POLITICAL_GROUP,FRA,,,
1427,org/836,Organization,836,EU_PARLIAMENT,"{'endDate': '2004-07-19', 'id': 'time-period/1...",FN,FN,836,836,Front national,NATIONAL_POLITICAL_GROUP,FRA,,,
1428,org/905,Organization,905,EU_PARLIAMENT,"{'endDate': '1961-12-14', 'id': 'time-period/1...",RGR,RGR,905,905,Rassemblement des gauches républicaines,NATIONAL_POLITICAL_GROUP,FRA,,,


In [14]:
 pd.read_parquet("data/MEP_DATA.parquet")

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,citizenship,placeOfBirth,familyName,givenName,...,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,account,gender,honorific_prefix,national_parties,latest_np_affiliation
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,ITA,Pesaro,Forlani,Arnaldo,...,,,,,,,MALE,MR,"[{'endDate': '1994-07-18', 'id': '625', 'start...",625
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,DEU,Braunschweig,Onur,Leyla,...,,,,,,,FEMALE,MS,"[{'endDate': '1994-07-18', 'id': '741', 'start...",741
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,Godelieve,...,,,,,,,FEMALE,MS,"[{'endDate': '2009-07-13', 'id': '1375', 'star...",4008
3,person/1091,Person,1091,Roberto SPECIALE,1091,1943-08-03,ITA,Chiavari (Genova),Speciale,Roberto,...,,,,,,,MALE,MR,"[{'endDate': '1994-07-18', 'id': '863', 'start...",913
4,person/112071,Person,112071,Emer COSTELLO,112071,1962-09-03,IRL,Louth,Costello,Emer,...,,,,,,,FEMALE,MS,"[{'endDate': '2014-06-30', 'id': '2760', 'star...",2760
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4880,person/895,Person,895,Ian M. DALZIEL,895,1947-06-21,GBR,Edinburgh,Dalziel,Ian M.,...,,,,,,,MALE,MR,"[{'endDate': '1984-07-23', 'id': '812', 'start...",812
4881,person/905,Person,905,Francisque COLLOMB,905,1910-12-19,FRA,Rambert en Bugey (Ain),Collomb,Francisque,...,,,,,,,MALE,MR,"[{'endDate': '1984-07-23', 'id': '771', 'start...",771
4882,person/916,Person,916,Jacques CHIRAC,916,1932-11-29,FRA,Paris,Chirac,Jacques,...,,,,,,,MALE,MR,"[{'endDate': '1980-04-28', 'id': '627', 'start...",627
4883,person/96704,Person,96704,Edvard KOŽUŠNÍK,96704,1971-01-30,CZE,Olomouc,Kožušník,Edvard,...,,,,,,,MALE,MR,"[{'endDate': '2014-06-30', 'id': '2691', 'star...",2691


In [15]:
exploded_party_affiliation = pd.read_parquet("data/MEP_DATA.parquet").explode(['national_parties'])

In [16]:
exploded_party_affiliation.head()

Unnamed: 0,id,type,identifier,label,notation_codictPersonId,bday,citizenship,placeOfBirth,familyName,givenName,...,deathDate,officialFamilyName,officialGivenName,upperOfficialFamilyName,upperOfficialGivenName,account,gender,honorific_prefix,national_parties,latest_np_affiliation
0,person/1008,Person,1008,Arnaldo FORLANI,1008,1925-12-08,ITA,Pesaro,Forlani,Arnaldo,...,,,,,,,MALE,MR,"{'endDate': '1994-07-18', 'id': '625', 'startD...",625
1,person/1053,Person,1053,Leyla ONUR,1053,1945-01-08,DEU,Braunschweig,Onur,Leyla,...,,,,,,,FEMALE,MS,"{'endDate': '1994-07-18', 'id': '741', 'startD...",741
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,Godelieve,...,,,,,,,FEMALE,MS,"{'endDate': '2009-07-13', 'id': '1375', 'start...",4008
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,Godelieve,...,,,,,,,FEMALE,MS,"{'endDate': '1994-07-18', 'id': '605', 'startD...",4008
2,person/1055,Person,1055,Godelieve QUISTHOUDT-ROWOHL,1055,1947-06-18,DEU,Etterbeek (Belgien),Quisthoudt-Rowohl,Godelieve,...,,,,,,,FEMALE,MS,"{'endDate': '2019-07-01', 'id': '4008', 'start...",4008


In [18]:
exploded_party_affiliation['party_start_date'] = exploded_party_affiliation['national_parties'].str['startDate']
exploded_party_affiliation['party_end_date']  = exploded_party_affiliation['national_parties'].str['endDate']
exploded_party_affiliation['party_id'] =  exploded_party_affiliation['national_parties'].str['id']

In [20]:
exploded_party_affiliation.to_parquet("EXPANDED_MEP_PARTY_MAPPINGS.parquet")