In [6]:
import glob
import json
import os
from time import sleep

from tqdm import tqdm
import subprocess
import requests
from subprocess import CalledProcessError


In [7]:
video_dir = 'E:/videos'

def get_all_files():
    information_dir = 'D:/Master Project/Detail Lists'
    files = glob.glob(os.path.join(information_dir, '*.json'))
    files.sort()
    return files

def update_progress(prog):
    with open('progress.json', 'w') as file:
        json.dump(prog, file)

def get_progress():
    if not os.path.exists('progress.json'):
        return []
    with open('progress.json', 'r') as file:
        prog = json.load(file)
    return prog

In [8]:
def download_video(video_url, video_path):
    os.makedirs(video_dir, exist_ok=True)
    with requests.get(video_url, stream=True) as response:
        if response.status_code == 200:
            with open(video_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)
            return True
        else:
            return False

def get_video_duration(video_url):
    cmd = [
        "ffprobe", "-v", "error", "-show_entries",
        "format=duration", "-of",
        "default=noprint_wrappers=1:nokey=1", video_url
    ]
    duration = float(subprocess.check_output(cmd).decode().strip())
    return duration

def get_min_duration_video_url(videos):
    durations = {}
    for video in videos:
        video_url = video['mp4']['max']
        duration = get_video_duration(video_url)
        durations[video_url] = duration
    return min(durations, key=durations.get)

def append_fail_games(fail_game):
    print(f"Failed to process {list(fail_game.keys())[0]}")
    if not os.path.exists('fail_games.json'):
        fail_games = []
    else:
        with open('fail_games.json', 'r') as file:
            fail_games = json.load(file)

    fail_games.append(fail_game)

    with open('fail_games.json', 'w') as file:
        json.dump(fail_games, file, indent=2)

In [9]:
all_files = get_all_files()
progress = get_progress()
remain_files = [file for file in all_files if file not in progress]

In [None]:
for i, file in enumerate(remain_files):
    with open(file, 'r') as f:
        data = json.load(f)

    for game_dict in tqdm(data, f'{i+1}/{len(remain_files)}:'):
        game_id = list(game_dict.keys())[0]
        success = game_dict[game_id]['success']
        if not success:
            continue

        game_data = game_dict[game_id]['data']
        game_type = game_data['type']
        if game_type != 'game':
            continue

        video_path = os.path.join(video_dir, f'{game_id}.mp4')
        game_videos = game_data.get('movies', [])
        if len(game_videos) == 0:
            continue

        if len(game_videos) == 1:
            success = download_video(game_videos[0]['mp4']['max'], video_path)
        elif len(game_videos) > 1:
            try:
                min_duration_video_url = get_min_duration_video_url(game_videos)
            except CalledProcessError:
                append_fail_games(game_dict)
                continue
            success = download_video(min_duration_video_url, video_path)

        if not success:
            append_fail_games(game_dict)
        sleep(0.1)

    progress.append(file)
    update_progress(progress)

1/3604::  30%|███       | 9/30 [00:16<00:36,  1.72s/it]