# Extract audio from video

In [None]:
from moviepy.video.io.VideoFileClip import VideoFileClip

# Load the video file
video = VideoFileClip("input/PXL_20250222_123802359.mp4")

# Extract the audio
audio = video.audio

# Write the audio to a file
audio.write_audiofile("output/output_audio.mp3")

# Find loud segments

In [93]:
from numpy import log10
from pydub import AudioSegment
from pydub.silence import detect_nonsilent

# Load the audio file
audio_segment = AudioSegment.from_mp3("output/output_audio.mp3")

# Define chunk size (in milliseconds)
chunk_size = 1000  # 5 seconds

# Define loudness threshold (in dBFS)
loudness_threshold = -17

# Find nonsilent parts
nonsilent_ranges = detect_nonsilent(audio_segment, min_silence_len=chunk_size, silence_thresh=loudness_threshold)

# get segments by ranges
# for start, end in nonsilent_ranges:
#     segment_audio = audio_segment[start:end]
#     max_amplitude = segment_audio.max

#     start_in_seconds = start / 1000
#     end_in_seconds = end / 1000
#     duration_in_seconds = (end - start) / 1000
#     print(f"Max amplitude in segment: {max_amplitude:.2f}")
#     print(f"Loud segment: {start_in_seconds:.2f}s to {end_in_seconds:.2f}s (duration: {duration_in_seconds:.2f}s)")






# Create JSON

In [94]:
import json

nonsilent_ranges.sort(key=lambda x: x[0])

nonsilent_ranges_json = []
start = 0
for i, (start, end) in enumerate(nonsilent_ranges):
    segment_audio = audio_segment[start:end]
    max_amplitude = segment_audio.max

    start_in_seconds = start / 1000
    end_in_seconds = end / 1000
    duration_in_seconds = (end - start) / 1000

    if i == 0:
        if start > 0:
            nonsilent_ranges_json.append({
                "start": 0,
                "end": start_in_seconds,
                "duration": start_in_seconds,
                "max_amplitude": 0,
                "event_type": "silence"
            })

    nonsilent_ranges_json.append({
        "start": start_in_seconds,
        "end": end_in_seconds,
        "duration": duration_in_seconds,
        "max_amplitude": max_amplitude,
        "event_type": "noise"
    })

    if i < len(nonsilent_ranges) - 1:
        next_start = nonsilent_ranges[i + 1][0]
        if end < next_start:
            nonsilent_ranges_json.append({
                "start": end_in_seconds,
                "end": next_start / 1000,
                "duration": (next_start - end) / 1000,
                "max_amplitude": 0,
                "event_type": "silence"
            })


# write the json to a file
with open("output/nonsilent_ranges.json", "w") as f:
    json.dump(nonsilent_ranges_json, f, indent=4)

# Plot wave form

In [None]:
# imports 
import matplotlib.pyplot as plt 
import numpy as np 
  
# shows the sound waves 
def visualize(path: str): 
    
    # reading the audio file 
    audio_segment = AudioSegment.from_mp3(path)
    
    samples = audio_segment.get_array_of_samples()

    sample_rate = audio_segment.frame_rate
    print(f"Sample rate: {sample_rate} Hz")

    duration = audio_segment.duration_seconds
    print(f"Duration: {duration:.2f} seconds")
  
    # to Plot the x-axis in seconds  
    # you need get the frame rate  
    # and divide by size of your signal 
    # to create a Time Vector  
    # spaced linearly with the size  
    # of the audio file 
    time = np.linspace( 
        0, # start 
        duration, 
        num = len(samples) 
    ) 

    # high resolution plot
    plt.figure(figsize=(15, 9))
  
    # using matplotlib to plot 
    # creates a new figure 
    plt.figure(1) 
      
    # title of the plot 
    plt.title("Sound Wave") 
      
    # label of axis 
    plt.xticks(np.arange(0, duration, 5))
    plt.xlabel('Time (seconds)')
    plt.ylabel('Amplitude')
     
    # actual plotting 
    plt.plot(time, samples) 

    for start, end in nonsilent_ranges:
        plt.axvspan(start / 1000, end / 1000, color='red', alpha=0.6, label='Nonsilent Range')
      
    # shows the plot  
    # in new window 
    plt.show() 
  
    # you can also save 
    # the plot using 
    # plt.savefig('filename') 

visualize("output/output_audio.mp3") 