In [None]:
import glob
import json
import os
from time import sleep

from tqdm import tqdm
import subprocess
import requests
from subprocess import CalledProcessError

In [None]:
video_dir = 'E:/videos'

def get_all_files():
    information_dir = 'D:/Master Project/Detail Lists'
    files = glob.glob(os.path.join(information_dir, '*.json'))
    files.sort()
    return files

def update_progress(prog):
    with open('progress.json', 'w') as file:
        json.dump(prog, file)

def get_progress():
    if not os.path.exists('progress.json'):
        return []
    with open('progress.json', 'r') as file:
        prog = json.load(file)
    return prog

In [None]:
def download_video(video_url, video_path):
    os.makedirs(video_dir, exist_ok=True)
    with requests.get(video_url, stream=True) as response:
        if response.status_code == 200:
            with open(video_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)
            return True
        else:
            return False

def get_video_duration(video_url):
    cmd = [
        "ffprobe", "-v", "error", "-show_entries",
        "format=duration", "-of",
        "default=noprint_wrappers=1:nokey=1", video_url
    ]
    duration = float(subprocess.check_output(cmd).decode().strip())
    return duration

def get_min_duration_video_url(videos):
    durations = {}
    for video in videos:
        video_url = video['mp4']['max']
        duration = get_video_duration(video_url)
        durations[video_url] = duration
    return min(durations, key=durations.get)

def append_fail_games(fail_game):
    print(f"Failed to process {list(fail_game.keys())[0]}")
    if not os.path.exists('fail_games.json'):
        fail_games = []
    else:
        with open('fail_games.json', 'r') as file:
            fail_games = json.load(file)

    fail_games.append(fail_game)

    with open('fail_games.json', 'w') as file:
        json.dump(fail_games, file, indent=2)

def get_item_ids():
    with open('../../data/item_ids.json', 'r') as file:
        item_ids = json.load(file)
    return item_ids

In [None]:
all_files = get_all_files()
progress = get_progress()
item_ids = get_item_ids()
remain_files = [file for file in all_files if file not in progress]

In [5]:
for i, file in enumerate(remain_files):
    with open(file, 'r') as f:
        data = json.load(f)

    for game_dict in tqdm(data, f'{i+1}/{len(remain_files)}:'):
        game_id = list(game_dict.keys())[0]
        if int(game_id) not in item_ids:
            continue

        success = game_dict[game_id]['success']
        if not success:
            continue

        game_data = game_dict[game_id]['data']
        game_type = game_data['type']
        if game_type != 'game':
            continue

        video_path = os.path.join(video_dir, f'{game_id}.mp4')
        game_videos = game_data.get('movies', [])
        if len(game_videos) == 0:
            continue

        if len(game_videos) == 1:
            success = download_video(game_videos[0]['mp4']['max'], video_path)
        elif len(game_videos) > 1:
            try:
                min_duration_video_url = get_min_duration_video_url(game_videos)
            except CalledProcessError:
                append_fail_games(game_dict)
                continue
            success = download_video(min_duration_video_url, video_path)

        if not success:
            append_fail_games(game_dict)
        sleep(0.1)

    progress.append(file)
    update_progress(progress)

80/982:: 100%|██████████| 40/40 [01:39<00:00,  2.48s/it]
81/982::  48%|████▊     | 19/40 [01:46<01:28,  4.22s/it]

Failed to process 35450


81/982:: 100%|██████████| 40/40 [02:51<00:00,  4.28s/it]
82/982:: 100%|██████████| 39/39 [02:17<00:00,  3.54s/it]
83/982:: 100%|██████████| 39/39 [01:18<00:00,  2.01s/it]
84/982:: 100%|██████████| 38/38 [02:52<00:00,  4.53s/it]
85/982:: 100%|██████████| 39/39 [01:52<00:00,  2.90s/it]
86/982:: 100%|██████████| 40/40 [01:51<00:00,  2.80s/it]
87/982:: 100%|██████████| 38/38 [01:56<00:00,  3.08s/it]
88/982:: 100%|██████████| 40/40 [02:31<00:00,  3.80s/it]
89/982:: 100%|██████████| 39/39 [01:01<00:00,  1.58s/it]
90/982:: 100%|██████████| 37/37 [02:32<00:00,  4.13s/it]
91/982:: 100%|██████████| 40/40 [02:23<00:00,  3.59s/it]
92/982:: 100%|██████████| 38/38 [01:56<00:00,  3.06s/it]
93/982:: 100%|██████████| 40/40 [02:08<00:00,  3.22s/it]
94/982:: 100%|██████████| 39/39 [01:51<00:00,  2.86s/it]
95/982:: 100%|██████████| 39/39 [02:12<00:00,  3.39s/it]
96/982:: 100%|██████████| 40/40 [02:12<00:00,  3.30s/it]
97/982:: 100%|██████████| 39/39 [01:47<00:00,  2.76s/it]
98/982:: 100%|██████████| 40/40

Failed to process 218640


110/982:: 100%|██████████| 40/40 [02:10<00:00,  3.26s/it]
111/982:: 100%|██████████| 37/37 [02:16<00:00,  3.70s/it]
112/982:: 100%|██████████| 39/39 [01:16<00:00,  1.96s/it]
113/982:: 100%|██████████| 38/38 [01:10<00:00,  1.84s/it]
114/982:: 100%|██████████| 40/40 [00:59<00:00,  1.49s/it]
115/982:: 100%|██████████| 37/37 [02:46<00:00,  4.51s/it]
116/982:: 100%|██████████| 40/40 [01:25<00:00,  2.13s/it]
117/982:: 100%|██████████| 39/39 [02:41<00:00,  4.15s/it]
118/982:: 100%|██████████| 40/40 [01:39<00:00,  2.49s/it]
119/982:: 100%|██████████| 40/40 [01:50<00:00,  2.77s/it]
120/982:: 100%|██████████| 40/40 [01:43<00:00,  2.60s/it]
121/982:: 100%|██████████| 39/39 [01:40<00:00,  2.57s/it]
122/982:: 100%|██████████| 40/40 [01:13<00:00,  1.85s/it]
123/982:: 100%|██████████| 40/40 [02:30<00:00,  3.76s/it]
124/982:: 100%|██████████| 38/38 [01:58<00:00,  3.13s/it]
125/982:: 100%|██████████| 37/37 [02:03<00:00,  3.33s/it]
126/982:: 100%|██████████| 38/38 [01:36<00:00,  2.54s/it]
127/982:: 100%

Failed to process 63380


145/982:: 100%|██████████| 37/37 [01:13<00:00,  1.99s/it]
146/982::   0%|          | 0/39 [00:02<?, ?it/s]


KeyboardInterrupt: 