# Getting Speaker Names by Speaker ID (via API call)

In this Doc, we use "uniqe_speaker_ids.csv" and receive Speaker names for each unique speaker-ID.


API calls go to Bundestags-Mine "/api/DashboardController/GetSpeakerById/{speakerId}"

In [1]:
# Setup - necessary packages
import requests
import pandas as pd
from tqdm import tqdm 
import numpy as np

# loading csv wit speaker IDs
speaker_ids = pd.read_csv("../data/unique_speaker_ids.csv")
speaker_ids

Unnamed: 0,speaker_id
0,11001235
1,11001938
2,11002190
3,11003124
4,11003206
...,...
1043,11005179
1044,11005243
1045,999990130
1046,999990132


In [None]:
# Initiate list for all speaker results
all_speakers = []

# API call for each speaker ID
for speaker_id in tqdm(speaker_ids["speaker_id"]):
    url = f"https://bundestag-mine.de/api/DashboardController/GetSpeakerById/{speaker_id}"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            data = response.json()
            result = data.get("result")
            if result:
                all_speakers.append(result)
        else:
            print(f"Failed for {speaker_id}: {response.status_code}")
    except Exception as e:
        print(f"Exception for {speaker_id}: {e}")

# final DataFrame to pandas df
df_all = pd.DataFrame(all_speakers)

df_all

# ca 1 min 50 sec

  0%|          | 0/1048 [00:00<?, ?it/s]

100%|██████████| 1048/1048 [01:49<00:00,  9.54it/s]


Unnamed: 0,academicTitle,historySince,birthDate,deathDate,gender,maritalStatus,religion,profession,party,fraction,lastName,firstName,speakerId,mongoId,id
0,,1990-12-20T00:00:00,1952-03-03T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 2 Kinder",,"Rechtsanwalt, Dipl.-Volkswirt",FDP,FDP,Kubicki,Wolfgang,11001235,620ea0f8dc9668643df4787c,04e186a4-2948-40f6-84a3-08da10a1bba9
1,Dr.,1972-12-13T00:00:00,1942-09-18T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 4 Kinder",evangelisch,Rechtsanwalt,CDU,,Schäuble,Dr. Wolfgang,11001938,620ea0f8dc9668643df4767b,b0f8e407-8d10-4846-85b3-08da10a1bba9
2,Dr.,1980-11-04T00:00:00,1940-11-24T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 3 Kinder",evangelisch,"Vizepräsident DBT a. D., Bankkaufmann, Dipl.-A...",FDP,,Otto Solms,Alterspräsident Dr. Hermann,11002190,620ea0f8dc9668643df47693,4d72a30e-e8e7-4577-87a8-08da10a1bba9
3,Dr.,1998-10-26T00:00:00,1957-03-10T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 5 Kinder",evangelisch-lutherisch,Rechtsanwalt,CSU,CDU/CSU,Friedrich,Hans-Peter,11003124,620ea0f8dc9668643df47930,935bd0d2-9a90-4556-8788-08da10a1bba9
4,,1998-10-26T00:00:00,1963-08-09T00:00:00,0001-01-01T00:00:00,weiblich,verheiratet,ohne Angaben,"Vizepräsidentin DBT, Lehrerin",DIE LINKE.,Die Linke,Pau,Petra,11003206,620ea0f8dc9668643df47655,a8f6907c-574e-41da-8817-08da10a1bba9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,Dr.,2021-10-26T00:00:00,1965-07-04T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 2 Kinder",evangelisch,"Arzt, Facharzt für Chirurgie",SPD,SPD,Philippi,Andreas,11005179,00000000-0000-0000-0000-000000000000,b0ddd279-81b6-41c7-169e-08da85b550b8
1044,,2021-10-26T00:00:00,1958-01-24T00:00:00,0001-01-01T00:00:00,männlich,"verheiratet, 2 Kinder",konfessionslos,Dipl.-Sportlehrer (Trainer),SPD,SPD,Ullrich,Frank,11005243,00000000-0000-0000-0000-000000000000,bb6469c8-cec4-4d20-9009-08da995401b8
1045,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,SPD,Bartz,Alexander,999990130,00000000-0000-0000-0000-000000000000,2d256c53-64b6-4d72-64f2-08daea560e86
1046,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,SPD,Vontz,Emily,999990132,00000000-0000-0000-0000-000000000000,d076d89d-099c-4f08-dffa-08db1b10658c


In [3]:
# Converting empty strings to NAs
df_all.replace("", np.nan, inplace=True)

# Check for NAs
print("NAs per coloumn: ")
for column in df_all.columns:
    print(column,  sum(df_all[column].isna()))


NAs per coloumn: 
academicTitle 859
historySince 0
birthDate 0
deathDate 0
gender 14
maritalStatus 103
religion 302
profession 19
party 14
fraction 71
lastName 0
firstName 0
speakerId 0
mongoId 0
id 0


In [4]:
# check the missing values in party out, seems most have a fraction, exclude??? -> I am not excluding for now, we can change later if necessary :)
missing_party_rows = df_all[df_all["party"].isna()]
missing_party_rows

# exclude all rows where 
#df_all = df_all.dropna(subset=["party"]).copy()
#len(df_all)

Unnamed: 0,academicTitle,historySince,birthDate,deathDate,gender,maritalStatus,religion,profession,party,fraction,lastName,firstName,speakerId,mongoId,id
719,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,CDU/CSU,Natterer,Christian,999990104,620ea0f8dc9668643df476ad,c5df33a8-bb34-4491-85d3-08da10a1bba9
723,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,FDP,Gohl,Christopher,999990110,620ea0f8dc9668643df478d1,b8630ddc-3279-4513-86d4-08da10a1bba9
728,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,CDU/CSU,Friemann-Jennert,Maika,999990108,620ea0f8dc9668643df47880,de2dbe6a-9da2-484c-8632-08da10a1bba9
729,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,CDU/CSU,Nordt,Kristina,999990107,620ea0f8dc9668643df4779f,f44b1c4a-0468-4a40-859e-08da10a1bba9
741,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,Bremen,Bürgermeister,"Carsten Sieling,",10000,620ea0f8dc9668643df4777e,59c22a2a-6bb9-4ba5-8856-08da10a1bba9
880,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,SPD,Rinkert,Daniel,999990129,00000000-0000-0000-0000-000000000000,456401c0-f5c6-4cb6-d719-08dafacde145
927,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,SPD,Vontz,Emily,11005302,00000000-0000-0000-0000-000000000000,37b8f119-d08e-4a6a-9336-08db1e516906
1028,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,Bündnis 90 / Die Grünen,Krumwiede-Steiner,Franziska,11005312,00000000-0000-0000-0000-000000000000,2b890731-4147-47cf-d249-08dc4b5ee815
1030,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,SPD,Hohmann,Angela,11005313,00000000-0000-0000-0000-000000000000,34d004e3-3af3-489d-6c14-08dc612d7e28
1037,,0001-01-01T00:00:00,0001-01-01T00:00:00,0001-01-01T00:00:00,,,,,,FDP,Grünke,Julian,11005317,00000000-0000-0000-0000-000000000000,1bdf63f8-8fc2-4741-75d9-08dcd6e5c303


In [5]:
# subset only important columns
selected_columns = ["speakerId", "firstName","lastName",  "party", "fraction"]
df_subset = df_all[selected_columns]

# save df
df_subset.to_csv("../data/speaker_names.csv",index=False)