In [1]:
from os import path
import glob
import re
import matplotlib.pyplot as plt
import subprocess

In [2]:
folder = 'test_video_2'
files = glob.glob(f'files/{folder}/*')
files.sort()
files

['files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-audio-1674660538423.webm',
 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745-merged.mp4',
 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745.mp4',
 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745.webm',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-audio-1674660537992.webm',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995-merged.mp4',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995.mp4',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995.webm',
 'files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-audio-1674660543681.webm',
 'files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d

In [3]:
import subprocess

group_data = {}
for filepath in files:
    if ".webm" not in filepath:
        continue
    basename = path.basename(filepath)
    participant_id = "-".join(basename.split("-")[1:-3])
    track_type = basename.split("-")[-2]
    track_start = int(basename.split("-")[-1].split(".")[0])
    group_start = int(basename.split("-")[0])

    if participant_id not in group_data:
        group_data[participant_id] = {
        "participant_id": participant_id,
        "group_start": int(basename.split("-")[0]),
        "dirname": path.dirname(filepath),
    }
        
    result = subprocess.run(['ffprobe', filepath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    probe = result.stderr.splitlines()
    # get start offset
    line = [line  for line in probe if "start: " in line][0]
    print(line)
    start = re.search(r'start: (\d+\.\d+)', line).group(1)
    group_data[participant_id][track_type+"_track_start"] = start

    if track_type == "video":
        # get resolution
        line = [line  for line in probe if "Stream " in line][0]
        resolution = re.search(r'\d+x\d+', line).group(0)
        group_data[participant_id][track_type+"_resolution"] = resolution


    group_data[participant_id][track_type] = filepath
    group_data[participant_id][track_type+"_start"] = track_start
    group_data[participant_id][track_type+"_offset"] = track_start - group_start


group_data

  Duration: N/A, start: 2.871000, bitrate: N/A
  Duration: N/A, start: 4.170000, bitrate: N/A
  Duration: N/A, start: 2.445000, bitrate: N/A
  Duration: N/A, start: 2.982000, bitrate: N/A
  Duration: N/A, start: 8.123000, bitrate: N/A
  Duration: N/A, start: 9.589000, bitrate: N/A


{'2428b340-9dc1-40e7-831c-0703795b3307': {'participant_id': '2428b340-9dc1-40e7-831c-0703795b3307',
  'group_start': 1674660535410,
  'dirname': 'files/test_video_2',
  'audio_track_start': '2.871000',
  'audio': 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-audio-1674660538423.webm',
  'audio_start': 1674660538423,
  'audio_offset': 3013,
  'video_track_start': '4.170000',
  'video_resolution': '1280x720',
  'video': 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745.webm',
  'video_start': 1674660538745,
  'video_offset': 3335},
 'a7b0bec8-a1fd-4f9a-a922-e20bd56a539b': {'participant_id': 'a7b0bec8-a1fd-4f9a-a922-e20bd56a539b',
  'group_start': 1674660535410,
  'dirname': 'files/test_video_2',
  'audio_track_start': '2.445000',
  'audio': 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-audio-1674660537992.webm',
  'audio_start': 1674660537992,
  'audio_offset': 2582,
  'video_track_star

# FFMPEG notes:


## Commands
- commands generally list inputs with `-i filename` then have filters, then have and output.
- Flags for inputs (or outputs) go in front of the input (output) they apply to, and if you have multiple inputs you can apply the flag to then individually

## Filters
- ffmpeg filters are made of chains of different filters. Different chains are separated by a semicolon, filters within a chain by a comma
- filter consists of selectors (e.g.`[0:a][1:a]` etc.) then filter commands, then outputs []

### Audio

#### To split a stereo to mono:
```
ffmpeg -i stereo.wav -filter_complex "[0:a]channelsplit=channel_layout=stereo:channels=FR[right]" -map "[right]" front_right.wav
```

#### To merge 6 mono tracks to one multichannel output
```
ffmpeg -i front_left.wav -i front_right.wav -i front_center.wav -i lfe.wav -i back_left.wav -i back_right.wav \
-filter_complex "[0:a][1:a][2:a][3:a][4:a][5:a]join=inputs=6:channel_layout=5.1[a]" -map "[a]" output.wav
```

#### combine all audio tracks into one audio track
- use the amerge filter to take audio streams from different files and add them as separate streams to the output
```
[0:a][1:a]amerge=inputs=2,
```
the output of this can be fed to a single stereo stream:
```
pan=mono|c0<c0+c1+c2+c3|
```

### Video

#### Stack videos:
https://ffmpeg.org/ffmpeg-filters.html#xstack-1

# Reframe each video independently
The problem was that when one webm file has problems, all the rest get stalled out too. a workaround is to resample all the videos at a constant frame rate prior to stitching them together. This is annoying, but kinda works?

In [4]:
for filepath in files:
    if filepath.endswith(".mp4") or "audio" in filepath:
        continue
    outfilePath = filepath.replace(".webm", ".mp4")
    print(filepath, outfilePath)
    command = f'ffmpeg -i "{filepath}" -r 24 "{outfilePath}" -y'
    subprocess.run(command, shell=True, check=True)

files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745.webm files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745.mp4


ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with clang version 4.0.1 (tags/RELEASE_401/final)
  configuration: --prefix=/opt/concourse/worker/volumes/live/d5b9ea1c-8223-4ff6-7416-83e6b4cd6874/volume/ffmpeg_1587154914508/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehol --cc=x86_64-apple-darwin13.4.0-clang --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilt

files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995.webm files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995.mp4


More than 1000 frames duplicated    6144kB time=00:00:27.25 bitrate=1847.0kbits/s dup=87 drop=86 speed=2.04x    
frame=12600 fps=108 q=-1.0 Lsize=   54140kB time=00:08:44.87 bitrate= 845.0kbits/s dup=6913 drop=1020 speed= 4.5x    
video:53993kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.272304%
[libx264 @ 0x7f8e0501e800] frame I:55    Avg QP:15.12  size: 80335
[libx264 @ 0x7f8e0501e800] frame P:3333  Avg QP:19.07  size:  9665
[libx264 @ 0x7f8e0501e800] frame B:9212  Avg QP:26.98  size:  2025
[libx264 @ 0x7f8e0501e800] consecutive B-frames:  0.7%  3.2%  7.1% 89.0%
[libx264 @ 0x7f8e0501e800] mb I  I16..4: 14.9% 53.1% 32.1%
[libx264 @ 0x7f8e0501e800] mb P  I16..4:  2.4%  4.4%  0.8%  P16..4: 18.6%  6.8%  2.6%  0.0%  0.0%    skip:64.4%
[libx264 @ 0x7f8e0501e800] mb B  I16..4:  0.3%  0.4%  0.0%  B16..8: 17.0%  1.2%  0.2%  direct: 1.2%  skip:79.7%  L0:50.5% L1:46.2% BI: 3.4%
[libx264 @ 0x7f8e0501e800] 8x8 transform intra:57.0% inter:63.1%
[libx264 @ 0x7f8e0

files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-video-1674660544002.webm files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-video-1674660544002.mp4


frame=13683 fps= 88 q=-1.0 Lsize=   94220kB time=00:09:30.00 bitrate=1354.1kbits/s dup=375 drop=3106 speed=3.65x    
video:94066kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.164353%
[libx264 @ 0x7ff681019600] frame I:56    Avg QP:15.99  size: 99017
[libx264 @ 0x7ff681019600] frame P:3931  Avg QP:19.95  size: 14734
[libx264 @ 0x7ff681019600] frame B:9696  Avg QP:27.36  size:  3389
[libx264 @ 0x7ff681019600] consecutive B-frames:  0.7%  7.0% 22.8% 69.5%
[libx264 @ 0x7ff681019600] mb I  I16..4: 12.5% 59.8% 27.6%
[libx264 @ 0x7ff681019600] mb P  I16..4:  1.3%  3.7%  0.6%  P16..4: 27.9%  7.8%  3.5%  0.0%  0.0%    skip:55.0%
[libx264 @ 0x7ff681019600] mb B  I16..4:  0.2%  0.4%  0.0%  B16..8: 26.9%  1.9%  0.3%  direct: 1.0%  skip:69.3%  L0:55.5% L1:38.9% BI: 5.7%
[libx264 @ 0x7ff681019600] 8x8 transform intra:64.4% inter:54.3%
[libx264 @ 0x7ff681019600] coded y,uvDC,uvAC intra: 55.2% 50.7% 20.5% inter: 11.1% 2.8% 0.1%
[libx264 @ 0x7ff681019600] i16 v,h,dc,p

# Mary's finalized code

First, fill in silence based on the audio track's defined start time. `ffmpeg` command should look something like this

```
ffmpeg \
-i "files/set_two/1677511696683-67eedcee-98bf-4f05-90f7-9e8ed2ce58cb-cam-audio-1677511704290.webm" \
-i "files/set_two/1677511696683-fc210681-bd73-4bfc-ac8d-1c5c606a9bd0-cam-audio-1677511697662.webm" \
-filter_complex "[0:a]adelay=7659|7659[a0]; [1:a]adelay=946|946[a1]; [a0][a1]amerge=inputs=2[a]" -ac 2 -map "[a]" set_two_output_audio.webm
```

In [11]:
import subprocess
# Start building the ffmpeg command
ffmpeg_cmd = ["ffmpeg"]
filter_complex = []
input_count = 0

# Loop through each item in group_data to configure inputs and filters
for participant, data in group_data.items():
    audio_path = data['audio']
    # Convert audio_track_start to milliseconds and format it as integer
    delay_ms = int(float(data['audio_track_start']) * 1000)
    ffmpeg_cmd += ["-i", audio_path]
    filter_complex.append(f"[{input_count}:a]adelay={delay_ms}|{delay_ms}[a{input_count}]")
    input_count += 1

# Merge the audio inputs
filter_complex_str = '; '.join(filter_complex) + f"; {''.join(f'[a{i}]' for i in range(input_count))}amerge=inputs={input_count}[a]"
ffmpeg_cmd += ["-filter_complex", filter_complex_str, "-ac", "2", "-map", "[a]", "output_audio.webm"]

ffmpeg_cmd
# Execute the command
subprocess.run(ffmpeg_cmd)


['ffmpeg',
 '-i',
 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-audio-1674660538423.webm',
 '-i',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-audio-1674660537992.webm',
 '-i',
 'files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-audio-1674660543681.webm',
 '-filter_complex',
 '[0:a]adelay=2871|2871[a0]; [1:a]adelay=2445|2445[a1]; [2:a]adelay=8122|8122[a2]; [a0][a1][a2]amerge=inputs=3[a]',
 '-ac',
 '2',
 '-map',
 '[a]',
 'output_audio.webm']

Next, take the merged audio and overlap it on top of the videos which are horizontally stacked. Should have an input that looks something like this

The resampled mp4 files already account for video offests.
```
ffmpeg \
-i "files/set_two/set_two_ffmpeg_resampled_person1.mp4" \
-i "files/set_two/set_two_ffmpeg_resampled_person2.mp4" \
-i "set_two_output_audio.webm" \
-filter_complex "[0:v]scale=1280:720[v0];[1:v]scale=1280:720[v1];[v0][v1]hstack=inputs=2[v]" \
-map "[v]" -map 2:a \
files/set_two/set_two_combined_sepaudio.mp4
```

In [12]:
audio_inputs = [f'-itsoffset "{ group_data[i]["audio_track_start"] }" -i {group_data[i]["audio"]}' for i in group_data]
video_inputs = [f'-itsoffset "{ group_data[i]["video_track_start"] }" -i {group_data[i]["video"].replace(".webm", ".mp4")}' for i in group_data]
file_names = [group_data[i]["video"].replace(".webm", "").split('/')[-1] for i in group_data]

In [13]:
file_names

['1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745',
 '1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995',
 '1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-video-1674660544002']

In [15]:
# merges each person once
for i in range(len(video_inputs)):
    cmd = f'ffmpeg {audio_inputs[i]} {video_inputs[i]} -filter_complex "[1:v]scale=1280:720[v]" -map "0:a" -map "[v]" files/{folder}/{file_names[i]}-merged.mp4'
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)

In [4]:
audio_file = 'output_audio.webm'

# Start building the ffmpeg command
ffmpeg_cmd = ["ffmpeg"]
filter_complex = []
input_count = 0

# Add video inputs to the ffmpeg command
for participant, data in group_data.items():
    video_path = data['video'].replace(".webm", "-merged.mp4")
    ffmpeg_cmd += ["-i", video_path]
    filter_complex.append(f"[{input_count}:v]scale=1280:720[v{input_count}]")
    input_count += 1

# Add audio input to the ffmpeg command
ffmpeg_cmd += ["-i", audio_file]

# Construct the horizontal stacking part of the filter
hstack_str = f"{''.join(f'[v{i}]' for i in range(input_count))}hstack=inputs={input_count}[v]"

# Combine video and audio
filter_complex.append(hstack_str)
filter_complex_str = ';'.join(filter_complex)

# Finish building the ffmpeg command
ffmpeg_cmd += ["-filter_complex", filter_complex_str, "-map", "[v]", "-map", f"{input_count}:a", f"files/{folder}/output_video_5122024_newtry.mp4"]
ffmpeg_cmd
# subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)

['ffmpeg',
 '-i',
 'files/test_video_2/1674660535410-2428b340-9dc1-40e7-831c-0703795b3307-cam-video-1674660538745-merged.mp4',
 '-i',
 'files/test_video_2/1674660535410-a7b0bec8-a1fd-4f9a-a922-e20bd56a539b-cam-video-1674660537995-merged.mp4',
 '-i',
 'files/test_video_2/1674660535410-ae4d075b-39b6-48ec-a7c0-868d6fd06afe-cam-video-1674660544002-merged.mp4',
 '-i',
 'output_audio.webm',
 '-filter_complex',
 '[0:v]scale=1280:720[v0];[1:v]scale=1280:720[v1];[2:v]scale=1280:720[v2];[v0][v1][v2]hstack=inputs=3[v]',
 '-map',
 '[v]',
 '-map',
 '3:a',
 'files/test_video_2/output_video_5122024_newtry.mp4']

In [None]:
ffmpeg \
-i "files/set_two/set_two_ffmpeg_resampled_person1.mp4" \
-i "files/set_two/set_two_ffmpeg_resampled_person2.mp4" \
-i "set_two_output_audio.webm" \
-filter_complex "[0:v]scale=1280:720[v0];[1:v]scale=1280:720[v1];[v0][v1]hstack=inputs=2[v]" \
-map "[v]" -map 2:a \
files/set_two/set_two_combined_sepaudio.mp4