In [1]:
import json
from pathlib import Path
_API_DIR = Path("/Users/joregan/riksdag/riksdag-api-out")

In [2]:
def endswith_list(text, items):
    for it in items:
        if text.endswith(it):
            return True
    return False

In [3]:
def viddata_get_single_stream(videodata, hires=True):
    videos = []
    if videodata is None:
        return []
    if 'streams' not in videodata:
        #raise Exception("videodata is missing 'streams'")
        return []
    if videodata['streams'] is None:
        return []
    if 'files' not in videodata['streams']:
        #raise Exception("videodata['streams'] is missing 'files'")
        return []
    if type(videodata['streams']['files']) == list:
        for vfile in videodata['streams']['files']:
            for bw in vfile['bandwidth']:
                if hires and bw['name'] == 'Hög kvalitet':
                    videos.append(bw['downloadurl'])
                elif not hires and bw['name'] == 'Låg kvalitet':
                    videos.append(bw['downloadurl'])
    else:
        #raise Exception(f"Expected a list, got {type(videodata['streams']['files'])}")
        return []
    return videos

def viddata_get_streams(videodata, hires=True):
    output = []
    if 'videodata' not in videodata:
        #raise Exception("'videodata' missing")
        return []
    for vdata in videodata['videodata']:
        output += viddata_get_single_stream(vdata, hires)
    return output

def viddata_from_file(videofile, hires=True):
    with open(videofile) as jsonf:
        data = json.load(jsonf)
        return viddata_get_streams(data, hires)

In [4]:
def json_matches_years(filename, years):
    ret_val = False
    with open(filename) as f:
        data = json.load(f)
        if not "videodata" in data:
            #raise Exception(f"File {filename} missing key 'videodata'")
            return False
        videodata = data["videodata"]
        if videodata is None:
            print(f"Empty videodata: {filename}")
            return False
        for vdata in videodata:
            if vdata is None:
                print(f"Empty videodata: {filename}")
                return False
            if "debatedate" in vdata and vdata["debatedate"] is not None and vdata["debatedate"] != "":
                date = vdata["debatedate"]
                if endswith_list(date.strip(), years):
                    return True
    return False

In [35]:
matches = []
for file in _API_DIR.glob("H*"):
    if json_matches_years(file, ["2017", "2018"]):
        matches.append(str(file))

Empty videodata: /Users/joregan/riksdag/riksdag-api-out/H8C120210621zz


In [37]:
with open("2017-2018.txt", "w") as outf:
    for m in matches:
        outf.write(m + "\n")

In [7]:
with open("2017-2018-videos.txt", "w") as outf:
    for file in _API_DIR.glob("H*"):
        if json_matches_years(file, ["2017", "2018"]):
            videos = viddata_from_file(file)
            vidsout = "\t".join(videos)
            outf.write(f"{file.stem}\t{vidsout}\n")

Empty videodata: /Users/joregan/riksdag/riksdag-api-out/H8C120210621zz


In [6]:
videos

['https://mhdownload.riksdagen.se/VOD/161963_20000_829104.mp4',
 'https://mhdownload.riksdagen.se/VOD/162229_20000_830336.mp4',
 'https://mhdownload.riksdagen.se/VOD/176509_20000_890205.mp4',
 'https://mhdownload.riksdagen.se/VOD/163896_20000_836991.mp4',
 'https://mhdownload.riksdagen.se/riksdagen3/unrestricted/2018/06/01/825187.mp4',
 'https://mhdownload.riksdagen.se/VOD1/184384_20000_943081.mp4',
 'https://mhdownload.riksdagen.se/VOD/162175_20000_830161.mp4',
 'https://mhdownload.riksdagen.se/VOD/162052_20000_829636.mp4',
 'https://mhdownload.riksdagen.se/VOD/166120_20000_847909.mp4',
 'https://mhdownload.riksdagen.se/VOD/167149_20000_851873.mp4',
 'https://mhdownload.riksdagen.se/VOD1/182530_20000_928457.mp4',
 'https://mhdownload.riksdagen.se/VOD1/184204_20000_942177.mp4',
 'https://mhdownload.riksdagen.se/VOD/163504_20000_835235.mp4',
 'https://mhdownload.riksdagen.se/VOD/163080_20000_833777.mp4',
 'https://mhdownload.riksdagen.se/VOD/164145_20000_837954.mp4',
 'https://mhdownloa