# Digital Audio, Transcription, and Analysis

Digital audio is a representation of sound in a digital format. It is typically stored in a file format such as WAV, MP3, or FLAC. Digital audio is made up of a series of samples that represent the amplitude of the sound wave at a particular point in time. The sampling rate of the audio determines how many samples are taken per second, and the bit depth determines the resolution of each sample.

In [None]:
%pip install librosa matplotlib numpy openai openai-whisper python-dotenv IPython

In [None]:
from openai import OpenAI
import os
import dotenv

dotenv.load_dotenv()

openai = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [None]:
audio_file = 'chatgpt_audio_podcast.mp3'

In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio

def visualise_audio(file_path: str) -> Audio:
    """
    Create multiple visualizations for an audio file

    Parameters:
    file_path (str): Path to the MP3 file
    """
    # Load the audio file
    y, sr = librosa.load(file_path)

    plt.figure(figsize=(16, 9))

    librosa.display.waveshow(y, sr=sr)

    plt.title('Waveform')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')

    plt.show()

    # Display audio player
    return Audio(y, rate=sr)

# Example usage:
visualise_audio(audio_file)

In [None]:
import whisper

model = whisper.load_model("base")

result = model.transcribe(audio_file)
result

In [None]:
for segment in result['segments']:
    start = segment['start']
    text = segment['text']
    print(f"{start}s {text}")