In [None]:
import numpy as np
import moviepy.editor as mp
from moviepy.editor import VideoFileClip

import matplotlib.pyplot as plt

In [None]:
DELTA = 0.01  # length of subclips to sample over

def get_clip_sample(clip, time_idx):
    '''
    This method takes the original clip, 
    grabs the subclip of length DELTA (in seconds) starting at time `time_idx` (seconds from start), 
    and returns an array representing the loudness of that subclip.
    
    The array can be 2D if the audio is stereo (vs mono). 
    The length of the array is determined by DELTA and the FPS rate of to_soundarray()
    '''
    return clip.audio.subclip(time_idx, time_idx+DELTA).to_soundarray(fps=44100)

def get_average_volume(sound_arr):
    '''
    Returns the average volume of the audio clip given by the loudness array
    
    The array can be 1D or 2D without issue
    '''
    return np.sqrt(((1.0*sound_arr)**2).mean())

def get_sample_volume(clip, time_idx):
    '''
    Grabs the average volume of the subclip of length DELTA starting at time `time_idx`
    '''
    sample = get_clip_sample(clip, time_idx)
    return get_average_volume(sample)

def get_volume_array(clip):
    '''
    Takes as many samples of length DELTA across the duration of the clip
    and returns an array of those samples' average volumes
    '''
    return [get_sample_volume(clip, idx) for idx in np.arange(0, clip.audio.duration, DELTA)]

def get_action_peaks(volume_arr):
    '''
    Return indices where volume is above the 99th percentile
    '''
    top_perc = np.percentile(volume_arr, 99)
    top_indices = [idx for idx, vol in enumerate(volume_arr) if vol >= top_perc]
    return top_indices

def get_action_intervals(volume_arr):
    '''
    Return intervals (in index) of action events
    '''
    top_indices = get_action_peaks(volume_arr)
    median = np.percentile(volume_arr, 50)
    
    event_slices = []
    for top_idx in top_indices:
        # check if already in a slice
        for start_idx, end_idx in event_slices:
            if top_idx >= start_idx and top_idx <= end_idx:
                break
        else:
            # populate new slice
            left_idx = top_idx-1
            while volume_arr[left_idx] > median:
                left_idx -= 1
            right_idx = top_idx+1
            while volume_arr[right_idx] > median:
                right_idx += 1
            event_slices.append((left_idx, right_idx))
    return event_slices

def get_action_events(volume_arr):
    '''
    Return intervals (in seconds) of action events
    '''
    event_slices = get_action_intervals(volume_arr)
    return [(start_idx*DELTA, end_idx*DELTA) for start_idx, end_idx in event_slices]

In [None]:
with VideoFileClip("./raw.MOV") as clip:
    clip = clip.subclip(0, 30) # TODO: remove this
    
    volumes = get_volume_array(clip)
    events = get_action_events(volumes)
    
    
    plt.figure(figsize=(15, 4))

    top_perc = np.percentile(volumes, 99)
    median = np.percentile(volumes, 50)
    plt.axhline(y=top_perc, color='r', linestyle='-')
    plt.axhline(y=median, color='g', linestyle='-')
    
    for start_time, end_time in events:
        plt.axvspan(start_time, end_time, alpha=0.2, color='grey')
    
    plt.plot([idx*DELTA for idx in range(len(volumes))], volumes)
    plt.show()