# Extract Riksdag videos from API

> "Extracts videos of speeches from the Riksdag API"

- toc: false
- hidden: true
- branch: master
- badges: false
- categories: [riksdag, speech]

In [1]:
import requests
import json

In [2]:
sample = requests.get("https://data.riksdagen.se/api/mhs-vodapi?H210308")

In [3]:
data = json.loads(sample.text)

In [4]:
data['videodata'][0]

{'videostatus': 2,
 'committee': None,
 'type': None,
 'debatepreamble': None,
 'debatetexthtml': None,
 'livestreamurl': None,
 'activelivespeaker': None,
 'id': '53f4230f-0ea9-40e3-ba19-e0ffff123165',
 'dokid': 'H210308',
 'title': 'Nordiska utvecklingsfonden',
 'debatename': 'Interpellationsdebatt',
 'debatedate': '6 mars 2015',
 'debatetype': 'ip',
 'debateurl': '/sv/webb-tv/video/interpellationsdebatt/nordiska-utvecklingsfonden_H210308',
 'fromchamber': True,
 'thumbnailurl': 'https://mhdownload.riksdagen.se/posterframe/2442207020017436121.jpg',
 'debateseconds': 1011,
 'streams': {'files': [{'mimetype': 'mp4',
    'url': 'VOD1/PAL169/2442207020017436121',
    'videofileurl': 'https://mamstream.riksdagen.se/VOD1/_definst_/smil:PAL169/2442207020017436121.smil/playlist.m3u8',
    'audiofileurl': 'https://mhdownload.riksdagen.se/VOD1/PAL169/2442207020017436121_aud.mp3',
    'downloadfileurl': 'https://mhdownload.riksdagen.se/VOD1/PAL169/2442207020017436121_480p.mp4',
    'bandwidth':

In [5]:
def viddata_get_single_stream(videodata, hires=True):
    videos = []
    if videodata is None:
        return []
    if 'streams' not in videodata:
        #raise Exception("videodata is missing 'streams'")
        return []
    if videodata['streams'] is None:
        return []
    if 'files' not in videodata['streams']:
        #raise Exception("videodata['streams'] is missing 'files'")
        return []
    if type(videodata['streams']['files']) == list:
        for vfile in videodata['streams']['files']:
            for bw in vfile['bandwidth']:
                if hires and bw['name'] == 'Hög kvalitet':
                    videos.append(bw['downloadurl'])
                elif not hires and bw['name'] == 'Låg kvalitet':
                    videos.append(bw['downloadurl'])
    else:
        #raise Exception(f"Expected a list, got {type(videodata['streams']['files'])}")
        return []
    return videos

def viddata_get_streams(videodata, hires=True):
    output = []
    if 'videodata' not in videodata:
        #raise Exception("'videodata' missing")
        return []
    for vdata in videodata['videodata']:
        output += viddata_get_single_stream(vdata, hires)
    return output

In [11]:
def fix_speaker_name(name, party):
    if name.endswith(f" ({party})"):
        name = name[0:name.rfind(f" ({party})")]
    return name

def extract_speakers(data):
    speakers = []
    for viddata in data['videodata']:
        for speaker in viddata['speakers']:
            speaker['text'] = fix_speaker_name(speaker['text'], speaker['party'])
            speakers.append(speaker)
    return speakers

#print(data['videodata'][0])
speakers = extract_speakers(data)
viddata_get_streams(data)

['https://mhdownload.riksdagen.se/VOD1/PAL169/2442207160019939321_480p.mp4']

In [10]:
viddata_get_streams(data, False)


[]

In [13]:
from pathlib import Path
import glob
urls = []
for f in glob.glob('/Users/joregan/riksdag/riksdag-api-out/[GH]*'):
    fpath = Path(f)
    if not fpath.is_file():
        continue
    with open(f) as inf:
        data = json.load(inf)
        for url in viddata_get_streams(data):
            urls.append(url)

In [14]:
with open('/Users/joregan/riksdag/riksdag-api-out/video-urls.txt', 'w') as outf:
    for url in urls:
        outf.write(url + "\n")