In [1]:
import requests
import pandas as pd
import os

### grab info from api

In [2]:
#https://members-api.parliament.uk/index.html
#set variables
skip = 0
take = 20
all_members = []
current_member = 'true'
house = 1 #1 for commons, 2 for lords

#iterate through each page
while True:
    url = (f"https://members-api.parliament.uk/api/Members/Search?" +
    f"IsCurrentMember={current_member}&" +
    f"house={house}&" +
    f"skip={skip}&take={take}")
    response = requests.get(url)

    if response.status_code == 200: #if has found records
        data = response.json()
        members = data.get('items', [])
        if not members:
            break
        all_members.extend(members)
        print(f'acquired members #{skip}-{skip+take}')
        skip += take
    else:
        print(f"request failed: {response.status_code}")
        break

acquired members #0-20
acquired members #20-40
acquired members #40-60
acquired members #60-80
acquired members #80-100
acquired members #100-120
acquired members #120-140
acquired members #140-160
acquired members #160-180
acquired members #180-200
acquired members #200-220
acquired members #220-240
acquired members #240-260
acquired members #260-280
acquired members #280-300
acquired members #300-320
acquired members #320-340
acquired members #340-360
acquired members #360-380
acquired members #380-400
acquired members #400-420
acquired members #420-440
acquired members #440-460
acquired members #460-480
acquired members #480-500
acquired members #500-520
acquired members #520-540
acquired members #540-560
acquired members #560-580
acquired members #580-600
acquired members #600-620
acquired members #620-640
acquired members #640-660


### format + hone data

In [3]:
#put desired data in dataframe
data = []
for member in all_members:
    _id = member['value']['id']
    name = member['value']['nameDisplayAs']
    party = member['value']['latestParty']['name']
    party_colour = f"#{member['value']['latestParty']['backgroundColour']}"
    constituency = member['value']['latestHouseMembership']['membershipFrom']
    start_date = member['value']['latestHouseMembership']['membershipStartDate']
    contact = f"https://members.parliament.uk/member/{member['value']['id']}/contact"
    image_url = member['value']['thumbnailUrl']
    
    data.append({
        'id': _id,
        'name': name,
        'party': party,
        'party_colour': party_colour,
        'constituency': constituency,
        'start_date': start_date,
        'contact': contact,
        'image_url': image_url
    })
df = pd.DataFrame(data)

In [4]:
#make dates more readable
df['start_date_string'] = pd.to_datetime(df['start_date']).dt.strftime('%d %B, %Y')
df['start_date_string'] = df['start_date_string'].str.lstrip('0')

In [5]:
#make column for initial order (last name alphabetical)
df = df.reset_index()
df = df.rename(columns={'index':'order'})

In [6]:
#reorder
df = df[['order','id','name','party','party_colour','constituency','start_date','start_date_string','contact','image_url']]

In [7]:
df.head()

Unnamed: 0,order,id,name,party,party_colour,constituency,start_date,start_date_string,contact,image_url
0,0,172,Ms Diane Abbott,Labour,#d50000,Hackney North and Stoke Newington,1987-06-11T00:00:00,"11 June, 1987",https://members.parliament.uk/member/172/contact,https://members-api.parliament.uk/api/Members/...
1,1,5131,Jack Abbott,Labour (Co-op),#d50000,Ipswich,2024-07-04T00:00:00,"4 July, 2024",https://members.parliament.uk/member/5131/contact,https://members-api.parliament.uk/api/Members/...
2,2,4212,Debbie Abrahams,Labour,#d50000,Oldham East and Saddleworth,2011-01-13T00:00:00,"13 January, 2011",https://members.parliament.uk/member/4212/contact,https://members-api.parliament.uk/api/Members/...
3,3,5120,Shockat Adam,Independent,#909090,Leicester South,2024-07-04T00:00:00,"4 July, 2024",https://members.parliament.uk/member/5120/contact,https://members-api.parliament.uk/api/Members/...
4,4,5213,Dr Zubir Ahmed,Labour,#d50000,Glasgow South West,2024-07-04T00:00:00,"4 July, 2024",https://members.parliament.uk/member/5213/contact,https://members-api.parliament.uk/api/Members/...


### download images locally

In [8]:
#make folder
images_folder = 'table/images'
if not os.path.exists(images_folder):
    os.makedirs(images_folder)

In [9]:
#iterate through + download images
for idx, row in df.iterrows():
    filename = f"{row['id']}_thumbnail.jpg"
    image_path = os.path.join(images_folder, filename)
    with requests.get(row['image_url'], stream=True) as request:
        with open(image_path, 'wb') as file:
            file.write(request.content)

### export dataframe

In [10]:
df.to_json('table/data/data.json', orient='table', index=False)

In [11]:
#export csv download
dfExport = df[['name','party','constituency','start_date_string','contact','image_url']]
dfExport.to_csv('table/data/members_of_parliament_2024.csv')