In [1]:
# Imports
import ffmpeg
import os
import subprocess
import re
import pandas as pd
import numpy as np

In [2]:
# Defining parameters
source = "TestFile.mp3"
output = "OutFile.mp3"
check = "TestFile2.mp3"

spd = 1.5  # output speed
vol = 1.0  # volume rise ratio if any
bit = 16  # BitRate: 16K
frq = 24000  # sampling stream frequency

split_interval = 60

min_duration = 0.8 * split_interval
max_duration = 2.5 * split_interval

In [3]:
# ffmpeg commands and temporary parameters

#Temporary parameters
src, out = source, output

# commands
split_cmd = f"ffmpeg -i {src} -af silencedetect=n=-30dB:d=0.25 -f null - "
stg1 = f'ffmpeg -i {src} -filter:a "atempo={spd},volume={vol}" -q:a 100 {out}'
stg2 = f"ffmpeg -i {src} -ac 1 -b:a {bit}k -ar {frq} -write_xing 0 {output}"

**Finding silence and intervals, from this post**
https://stackoverflow.com/questions/14590279/error-nameerror-name-subprocess-is-not-defined

In [4]:
out = subprocess.Popen(split_cmd,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.STDOUT)
stdout, stderr = out.communicate()
stdout

b"ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\r\n  built with gcc 10.2.1 (GCC) 20200726\r\n  configuration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-

**Finding silence values using RegeX**


In [8]:
str_out = str(stdout)
# FInding all and converting to float in one step
s_start = map(float, re.findall(r'silence_start: (\d+.\d+)', str_out))
s_end = map(float, re.findall(r'silence_end: (\d+.\d+)', str_out))
s_duration = map(float, re.findall(r'silence_duration: (\d+.\d+)', str_out))

# zipping all together
results = list(zip(s_start, s_end, s_duration))
results

[(14.1607, 14.5508, 0.390113),
 (14.5509, 14.8773, 0.32644),
 (53.0442, 53.3589, 0.314671),
 (67.8339, 68.1681, 0.334218),
 (114.954, 115.233, 0.279206),
 (131.051, 131.385, 0.333991),
 (143.995, 144.25, 0.255125),
 (168.528, 168.838, 0.30966),
 (174.488, 174.809, 0.321088),
 (186.484, 186.759, 0.274921),
 (246.877, 247.2, 0.322494),
 (354.82, 355.123, 0.302766),
 (381.662, 382.15, 0.487982),
 (387.647, 387.954, 0.307256),
 (391.426, 391.968, 0.541338),
 (411.457, 411.745, 0.287506),
 (413.169, 413.475, 0.305624),
 (414.698, 415.037, 0.338889),
 (415.894, 416.155, 0.260975),
 (417.332, 417.977, 0.644603),
 (421.379, 421.786, 0.406893),
 (424.438, 424.878, 0.439841),
 (439.395, 439.699, 0.304059)]

## Splitting
The file `TestFile.mp3` duration is 7:21. (441 seconds)

Consider splitting the file every 60 seconds, 

i.e. @60, 120, 180, 240, 300, 360, 420 seconds

If the remaining portion is less than 30 seconds, then it should be merged to the last portion.

***We need to import pandas to deal with the table faster***

In [9]:
results_df = pd.DataFrame(
    results, columns=['silence_start', 'silence_end', 'silence_duration'])
results_df.head(3)

Unnamed: 0,silence_start,silence_end,silence_duration
0,14.1607,14.5508,0.390113
1,14.5509,14.8773,0.32644
2,53.0442,53.3589,0.314671


We need to split at the middle of the silence period,
 i.e., @ `silence_start + silence_duration / 2`

In [10]:
results_df[
    'silence_mid'] = results_df.silence_start + results_df.silence_duration / 2

results_df.head(3)

Unnamed: 0,silence_start,silence_end,silence_duration,silence_mid
0,14.1607,14.5508,0.390113,14.355757
1,14.5509,14.8773,0.32644,14.71412
2,53.0442,53.3589,0.314671,53.201535


Getting the file length
From this post:
    https://stackoverflow.com/a/64582429/5820024

In [11]:
end_time = float(ffmpeg.probe(src)['format']['duration'])
end_time

440.502857

In [12]:
# Creating a split array
split_arr = np.arange(split_interval, int(end_time), split_interval)
split_arr

array([ 60, 120, 180, 240, 300, 360, 420])

getting an array of `silence_mid`

In [13]:
silence_mid = results_df.silence_mid.to_numpy()
silence_mid

array([ 14.3557565,  14.71412  ,  53.2015355,  68.001009 , 115.093603 ,
       131.2179955, 144.1225625, 168.68283  , 174.648544 , 186.6214605,
       247.038247 , 354.971383 , 381.905991 , 387.800628 , 391.696669 ,
       411.600753 , 413.321812 , 414.8674445, 416.0244875, 417.6543015,
       421.5824465, 424.6579205, 439.5470295])

In [14]:
# adding a function to round values of a list
def round_list_values(the_list, rounding=3):
    return list(np.round(the_list, rounding))

Getting the nearest neighbor from the `silence_mid` to the `split_arr`

We call this a `success_array`

In [17]:
def get_success_array(silence_middle):
    i = 0
    success_array = []
    for j, silence_time in enumerate(silence_middle):
        if silence_time < split_arr[i]:
            continue
        else:
            # compare which item is closer to our suggested split time
            prev_silence = silence_middle[j - 1]
            if silence_time - split_arr[i] > split_arr[i] - prev_silence:
                if prev_silence not in success_array:
                    success_array.append(prev_silence)
                else:
                    success_array.append(silence_time)
            else:
                success_array.append(silence_time)
            # move to the next time in the split_arr
            if i < len(split_arr) - 1:
                i += 1
            else:
                break
    return round_list_values(success_array)


# Test
success_array = get_success_array(silence_mid)
success_array

[53.202, 115.094, 174.649, 247.038, 354.971, 381.906, 421.582]

Getting the difference between each two intervals, to get the lengths of split files
1. add zero before the array
1. add the lenght of the mp3 file after the array

In [18]:
dur_array = [0] + success_array + [end_time]
# dur_array= success_array+[end_time]
dur_array

[0, 53.202, 115.094, 174.649, 247.038, 354.971, 381.906, 421.582, 440.502857]

In [20]:
difference_array = round_list_values(np.diff(np.array(dur_array)))
difference_array

[53.202, 61.892, 59.555, 72.389, 107.933, 26.935, 39.676, 18.921]

### Merging small parts
Small part is considdered small if it is less than `x` seconds

The small part should be merged to the next part, so that the sum does not exceeds `y` seconds

In [21]:
def merge_small_durations(diff_arr, min_dur, max_dur, fwd=True):
    dur_arr_mod = []
    flag_raised = False  # the flag will be raised if we merges occur
    for i, dur in enumerate(diff_arr):
        if flag_raised:
            flag_raised = False
            continue
        if (dur < min_dur) & (dur > 0):
            # if dur < min_dur:
            if i + 1 < len(diff_arr):
                next_diff = diff_arr[i + 1]
                if dur + next_diff < max_dur:
                    if fwd: dur_arr_mod.append(0)
                    dur_arr_mod.append(dur + next_diff)
                    if not fwd: dur_arr_mod.append(0)
                    flag_raised = True
            else:
                dur_arr_mod.append(dur)
        else:
            dur_arr_mod.append(dur)
    return round_list_values(dur_arr_mod)

In [22]:
fwd_arr = merge_small_durations(difference_array, min_duration, max_duration)

bwd_arr = merge_small_durations(fwd_arr[::-1],
                                min_duration,
                                max_duration,
                                fwd=False)[::-1]

# getting the modified success_array
for ary in [success_array, dur_array, difference_array, fwd_arr, bwd_arr]:
    print(len(ary), ary)


7 [53.202, 115.094, 174.649, 247.038, 354.971, 381.906, 421.582]
9 [0, 53.202, 115.094, 174.649, 247.038, 354.971, 381.906, 421.582, 440.502857]
8 [53.202, 61.892, 59.555, 72.389, 107.933, 26.935, 39.676, 18.921]
8 [53.202, 61.892, 59.555, 72.389, 107.933, 0.0, 66.611, 18.921]
8 [53.202, 61.892, 59.555, 72.389, 107.933, 0.0, 0.0, 85.532]


In [23]:
# View results
list(zip(dur_array[1:], difference_array, bwd_arr))

[(53.202, 53.202, 53.202),
 (115.094, 61.892, 61.892),
 (174.649, 59.555, 59.555),
 (247.038, 72.389, 72.389),
 (354.971, 107.933, 107.933),
 (381.906, 26.935, 0.0),
 (421.582, 39.676, 0.0),
 (440.502857, 18.921, 85.532)]

**Calculating final stops after merge**
1. take elements equavalent to non zeros
2. remove the last element (end of file timestamp)

In [24]:
# stage 1
final_stops = [x for i, x in enumerate(dur_array[1:]) if bwd_arr[i] != 0]
# stage 2
final_stops = final_stops[:-1]
final_stops

[53.202, 115.094, 174.649, 247.038, 354.971]

**The final durations**

In [25]:
final_durations = round_list_values(
    np.diff(np.array([0] + final_stops + [end_time])))
final_durations

[53.202, 61.892, 59.555, 72.389, 107.933, 85.532]

## Executing The splitting process
I found the **ffmpeg code** in this post:<br>
https://unix.stackexchange.com/a/545955/406518<br>
and found **how to execute ffmpeg commands via python** in this post:<br>
https://stackoverflow.com/a/42438502/5820024

In [52]:
# split_cmd = f"ffmpeg -i {src} -af silencedetect=n=-30dB:d=0.25 -f null - "
# ffmpeg -i input.mp3 -vn -c copy -f segment -segment_times 0,8,22 output%d.mp3
slice_cmd = f"ffmpeg -i {src} -vn -c copy -f segment -segment_times 53,115,174,247,354 Outputs/output%d.mp3"


In [53]:
os.system(slice_cmd)

0

**SUCCESS!**

Trying to put the list values automatically

In [48]:
out_times = ", ".join(str(e) for e in final_stops)
slice_cmd = f"ffmpeg -i {src} -vn -c copy -f segment -segment_times "
slice_cmd += f"{out_times} Outputs/output%d.mp3"
slice_cmd

'ffmpeg -i TestFile.mp3 -vn -c copy -f segment -segment_times 53.202, 115.094, 174.649, 247.038, 354.971 Outputs/output%d.mp3'

In [51]:
os.system(slice_cmd)

1

In [55]:
slice_cmd = f"ffmpeg -i {src} -vn -c copy -f segment -segment_times 53.2,115.1,174.6,247.0,354.9 Outputs/output%d.mp3"
os.system(slice_cmd)

0

**I realized that the times should not be separated by spaces**

In [59]:
out_times = ",".join(str(e) for e in final_stops)
slice_cmd = f"ffmpeg -i {src} -vn -c copy -f segment -segment_times "
slice_cmd += f"{out_times} Outputs/output%d.mp3"
os.system(slice_cmd)

0