<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#List-of-all-parliaments-since-1935" data-toc-modified-id="List-of-all-parliaments-since-1935-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>List of all parliaments since 1935</a></span></li><li><span><a href="#Use-Wikidata-to-get-MPs" data-toc-modified-id="Use-Wikidata-to-get-MPs-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Use Wikidata to get MPs</a></span></li><li><span><a href="#Loop-through-each-parliament-and-get-all-MPs-from-Wikidata" data-toc-modified-id="Loop-through-each-parliament-and-get-all-MPs-from-Wikidata-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Loop through each parliament and get all MPs from Wikidata</a></span></li></ul></div>

### List of all parliaments since 1935

In [6]:
# from: https://www.wikidata.org/wiki/Wikidata:WikiProject_British_Politicians
all_parliaments = """Q41582593
Q41582597
Q41582600
Q41582603
Q41582604
Q41582606
Q41582608
Q41582609
Q41582612
Q41582615
Q41582617
Q41582619
Q41582621
Q41582624
Q41582627
Q36634044
Q35921591
Q35647955
Q35494253
Q30524718
Q30524710""".split("\n")

### Use Wikidata to get MPs

In [36]:
import requests
import pandas as pd

def extract_optional(row, key):
    try:
        return row[key]["value"]
    except KeyError:
        return None
        
def get_wiki(parl):
    # Get all mps that exist in wikidata.org
    wikidata_query = """SELECT DISTINCT ?mp ?mpLabel ?party ?partyLabel ?genderLabel WHERE { ?mp (p:P39/ps:P39/wdt:P279*) wd:"""+parl + """.
      ?mp wdt:P102 ?party.
      OPTIONAL { ?mp wdt:P21 ?gender. }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }"""

    wikidata_data = requests.get('https://query.wikidata.org/bigdata/namespace/wdq/sparql',
                                 params={'query': wikidata_query, 'format': 'json'}).json()

    

    # Convert the results to a pandas dataframe
    wikidata_mps = pd.DataFrame([ {"mp":mp["mpLabel"]["value"], "mp_id":mp["mp"]["value"],
                                   "gender":extract_optional(mp, "genderLabel"),
                                   "party": mp["partyLabel"]["value"], "party_id": mp["party"]["value"]} for mp in wikidata_data["results"]["bindings"] ])

    return wikidata_mps

### Loop through each parliament and get all MPs from Wikidata

In [22]:
all_mps = pd.concat([ get_wiki(parl) for parl in all_parliaments])

Group by name, gender and id to remove duplicate entries

In [24]:
unique_mps = all_mps[["gender", "mp", "mp_id"]].groupby(["gender", "mp", "mp_id"]).count().reset_index()

Split name into first and last names

In [None]:
unique_mps["first"] = unique_mps.mp.apply(lambda x: x.split(" ")[0] if x.split(" ")[0] != "Sir" else x.split(" ")[1]).str.replace(",", "")
unique_mps["last"] = unique_mps.mp.apply(lambda x: x.split(" ")[1]).str.replace(",", "")

Take each gender separately and count all the first names

In [30]:
men = unique_mps["first"].value_counts().reset_index()\
	.join(unique_mps[["gender", "first"]].groupby(["first", "gender"]).count().reset_index().set_index("first"), on="index")\
    .set_index(["gender", "index"])\
    .loc["male"].reset_index().rename(columns={"index":"name", "first":"size"}).pipe(lambda df: pd.concat([df[:100],pd.DataFrame([{"name": "Other men", "size": df[100:].size.sum()}])])).to_json(orient="records")

In [31]:
women = unique_mps["first"].value_counts().reset_index()\
	.join(unique_mps[["gender", "first"]].groupby(["first", "gender"]).count().reset_index().set_index("first"), on="index")\
    .set_index(["gender", "index"])\
    .loc["female"].reset_index().rename(columns={"index":"name", "first":"size"}).pipe(lambda df: pd.concat([df[:100],pd.DataFrame([{"name": "Other women", "size": df[100:].size.sum()}])])).to_json(orient="records")

In [35]:
'{ "name": "MPs", "children": [ {"name": "men", "children": ' + men + '}, {"name": "women", "children": ' + women + '}] }'

'{ "name": "MPs", "children": [ {"name": "men", "children": [{"name":"John","size":261},{"name":"David","size":134},{"name":"William","size":116},{"name":"George","size":90},{"name":"James","size":84},{"name":"Robert","size":73},{"name":"Michael","size":72},{"name":"Peter","size":65},{"name":"Richard","size":61},{"name":"Edward","size":45},{"name":"Charles","size":43},{"name":"Ian","size":39},{"name":"Arthur","size":38},{"name":"Tom","size":36},{"name":"Thomas","size":36},{"name":"Andrew","size":35},{"name":"Henry","size":33},{"name":"Alan","size":33},{"name":"Paul","size":33},{"name":"Frank","size":32},{"name":"Stephen","size":31},{"name":"Mark","size":30},{"name":"Geoffrey","size":26},{"name":"Chris","size":25},{"name":"Walter","size":24},{"name":"Anthony","size":24},{"name":"Joseph","size":24},{"name":"Harry","size":23},{"name":"Harold","size":22},{"name":"Frederick","size":22},{"name":"Patrick","size":22},{"name":"Christopher","size":21},{"name":"Alfred","size":20},{"name":"Norman"