# **Installation of Libraries**
The libraries are downloaded to be used in the audio intervention under the project
1. Assembly AI - Assembly AI has been downloaded to work in transcribing the audio files
2. Pydub - This library is installed to be used as a means to split the audio files into smaller segments

In [None]:
pip install assemblyai

Collecting assemblyai
  Downloading assemblyai-0.31.0-py3-none-any.whl.metadata (27 kB)
Collecting httpx>=0.19.0 (from assemblyai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting websockets>=11.0 (from assemblyai)
  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting httpcore==1.* (from httpx>=0.19.0->assemblyai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.19.0->assemblyai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading assemblyai-0.31.0-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownlo

In [None]:
pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


# **Splitting the audio**


Splitting of audio of "Our Planet - Frozen Worlds" by David Attenborough

In [None]:
import os
from pydub import AudioSegment
import math

def split_audio(audio_file):

    audio = AudioSegment.from_file(audio_file)

    #Segment length is 15 seconds
    segment_length = 15 * 1000

    total_duration = len(audio)

    # Finding the number of segments to be created based on the length of the audio and the
    num_segments = math.ceil(total_duration / segment_length)

    base_name = os.path.splitext(os.path.basename(audio_file))[0]

    # Creating a folder to save the split audios
    output_dir = os.path.join(os.path.dirname(audio_file), base_name)
    os.makedirs(output_dir, exist_ok=True)

    # Splitting the audio into num_segments segments
    for i in range(num_segments):
        start_time = i * segment_length
        end_time = min((i + 1) * segment_length, total_duration)
        segment = audio[start_time:end_time]
        #Adding the segment to the required folder
        segment.export(os.path.join(output_dir, f"Part{i+1}.mp3"), format="mp3")

if __name__ == "__main__":

    #Audio File name
    audio_file = "OurPlanet-FrozenWorlds.mp3"
    split_audio(audio_file)


Splitting of audiobook of "The Magic Shop" by H. G. Wells

In [None]:
import os
from pydub import AudioSegment
import math

def split_audio(audio_file):

    audio = AudioSegment.from_file(audio_file)

    #Segment length is 15 seconds
    segment_length = 15 * 1000

    total_duration = len(audio)

    # Finding the number of segments to be created based on the length of the audio and the
    num_segments = math.ceil(total_duration / segment_length)

    base_name = os.path.splitext(os.path.basename(audio_file))[0]

    # Creating a folder to save the split audios
    output_dir = os.path.join(os.path.dirname(audio_file), base_name)
    os.makedirs(output_dir, exist_ok=True)

    # Splitting the audio into num_segments segments
    for i in range(num_segments):
        start_time = i * segment_length
        end_time = min((i + 1) * segment_length, total_duration)
        segment = audio[start_time:end_time]

        segment.export(os.path.join(output_dir, f"Part{i+1}.mp3"), format="mp3")

if __name__ == "__main__":

    #Audio File name
    audio_file = "TheMagicShop.mp3"
    split_audio(audio_file)


# **Transcribing the audio**


Transcribing the 15 second segments of audio of "Our Planet - Frozen Worlds" by David Attenborough

In [None]:
import os
import assemblyai as aai

aai.settings.api_key = "YOUR_API_KEY"

def transcribe_and_save(folder_path):

    #for every audio file segment in the folder associated with that audio, we get a .txt file with its transcription with the same name
    for filename in os.listdir(folder_path):

        #looking for audio files
        if filename.endswith('.mp3'):

            #Getting paths of each audio file
            file_path = os.path.join(folder_path, filename)

            #Creating a .txt file related to that particular audio
            output_file = os.path.splitext(file_path)[0] + ".txt"

            #Getting the transcription of the audio file
            config = aai.TranscriptionConfig(
                summarization=True,
                summary_model=aai.SummarizationModel.informative,
                summary_type=aai.SummarizationType.bullets
            )
            transcript = aai.Transcriber().transcribe(file_path, config)
            if transcript.status == aai.TranscriptStatus.error:
                print(f"Error transcribing {filename}: {transcript.error}")
            else:

                #Saving the transcription received as text in the correct .txt file
                with open(output_file, 'w') as f:
                    f.write(f"{transcript.text}")
                print(f"Transcription saved to {output_file}")

if __name__ == "__main__":
    folder_path = "OurPlanet-FrozenWorlds"
    transcribe_and_save(folder_path)
    print("All transcriptions saved.")

Transcription saved to OurPlanet-FrozenWorlds/Part172.txt
Transcription saved to OurPlanet-FrozenWorlds/Part184.txt
Transcription saved to OurPlanet-FrozenWorlds/Part103.txt
Transcription saved to OurPlanet-FrozenWorlds/Part140.txt
Transcription saved to OurPlanet-FrozenWorlds/Part28.txt
Transcription saved to OurPlanet-FrozenWorlds/Part42.txt
Transcription saved to OurPlanet-FrozenWorlds/Part213.txt
Transcription saved to OurPlanet-FrozenWorlds/Part88.txt
Transcription saved to OurPlanet-FrozenWorlds/Part38.txt
Transcription saved to OurPlanet-FrozenWorlds/Part89.txt
Transcription saved to OurPlanet-FrozenWorlds/Part125.txt
Transcription saved to OurPlanet-FrozenWorlds/Part166.txt
Transcription saved to OurPlanet-FrozenWorlds/Part132.txt
Transcription saved to OurPlanet-FrozenWorlds/Part81.txt
Transcription saved to OurPlanet-FrozenWorlds/Part61.txt
Transcription saved to OurPlanet-FrozenWorlds/Part143.txt
Transcription saved to OurPlanet-FrozenWorlds/Part189.txt
Transcription saved t

Transcribing the 15 second segments of audiobook of "The Magic Shop" by H. G. Wells

In [None]:
import os
import assemblyai as aai

#API key for Assembly AI that gives us 1000 minutes of transcription
aai.settings.api_key = "YOUR_API_KEY"

def transcribe_and_save(folder_path):

    #for every audio file segment in the folder associated with that audio, we get a .txt file with its transcription with the same name
    for filename in os.listdir(folder_path):

        #looking for audio files
        if filename.endswith('.mp3'):

            #Getting paths of each audio file
            file_path = os.path.join(folder_path, filename)

            #Creating a .txt file related to that particular audio
            output_file = os.path.splitext(file_path)[0] + ".txt"

            #Getting the transcription of the audio file
            config = aai.TranscriptionConfig(
                summarization=True,
                summary_model=aai.SummarizationModel.informative,
                summary_type=aai.SummarizationType.bullets
            )
            transcript = aai.Transcriber().transcribe(file_path, config)
            if transcript.status == aai.TranscriptStatus.error:
                print(f"Error transcribing {filename}: {transcript.error}")
            else:

                #Saving the transcription received as text in the correct .txt file
                with open(output_file, 'w') as f:
                    f.write(f"{transcript.text}")
                print(f"Transcription saved to {output_file}")

if __name__ == "__main__":
    folder_path = "TheMagicShop"
    transcribe_and_save(folder_path)
    print("All transcriptions saved.")

Transcription saved to TheMagicShop/Part103.txt
Transcription saved to TheMagicShop/Part28.txt
Transcription saved to TheMagicShop/Part42.txt
Transcription saved to TheMagicShop/Part88.txt
Transcription saved to TheMagicShop/Part38.txt
Transcription saved to TheMagicShop/Part89.txt
Transcription saved to TheMagicShop/Part81.txt
Transcription saved to TheMagicShop/Part61.txt
Transcription saved to TheMagicShop/Part17.txt
Transcription saved to TheMagicShop/Part97.txt
Transcription saved to TheMagicShop/Part95.txt
Transcription saved to TheMagicShop/Part59.txt
Transcription saved to TheMagicShop/Part12.txt
Transcription saved to TheMagicShop/Part107.txt
Transcription saved to TheMagicShop/Part47.txt
Transcription saved to TheMagicShop/Part18.txt
Transcription saved to TheMagicShop/Part46.txt
Transcription saved to TheMagicShop/Part106.txt
Transcription saved to TheMagicShop/Part22.txt
Transcription saved to TheMagicShop/Part66.txt
Transcription saved to TheMagicShop/Part56.txt
Transcript

# **Deleting the segmented audios**


Deleting the 15 second segments of audio of "Our Planet - Frozen Worlds" by David Attenborough and keeping only the transcriptions

In [None]:
import os

def delete_mp3(folder):

    #Running code for each file in the given folder
    for filename in os.listdir(folder):

        #Looking for files that have the extension .mp3
        if filename.endswith('.mp3'):
            file_path = os.path.join(folder, filename)

            #Deleting the .mp3 file (15 second segment of the audio)
            os.remove(file_path)
            print(f"Deleted {file_path}")

if __name__ == "__main__":
    folder = "OurPlanet-FrozenWorlds"
    delete_mp3(folder)
    print("All .mp3 files deleted.")


Deleted OurPlanet-FrozenWorlds/Part172.mp3
Deleted OurPlanet-FrozenWorlds/Part184.mp3
Deleted OurPlanet-FrozenWorlds/Part103.mp3
Deleted OurPlanet-FrozenWorlds/Part140.mp3
Deleted OurPlanet-FrozenWorlds/Part28.mp3
Deleted OurPlanet-FrozenWorlds/Part42.mp3
Deleted OurPlanet-FrozenWorlds/Part213.mp3
Deleted OurPlanet-FrozenWorlds/Part88.mp3
Deleted OurPlanet-FrozenWorlds/Part38.mp3
Deleted OurPlanet-FrozenWorlds/Part89.mp3
Deleted OurPlanet-FrozenWorlds/Part125.mp3
Deleted OurPlanet-FrozenWorlds/Part166.mp3
Deleted OurPlanet-FrozenWorlds/Part132.mp3
Deleted OurPlanet-FrozenWorlds/Part81.mp3
Deleted OurPlanet-FrozenWorlds/Part61.mp3
Deleted OurPlanet-FrozenWorlds/Part143.mp3
Deleted OurPlanet-FrozenWorlds/Part189.mp3
Deleted OurPlanet-FrozenWorlds/Part17.mp3
Deleted OurPlanet-FrozenWorlds/Part206.mp3
Deleted OurPlanet-FrozenWorlds/Part97.mp3
Deleted OurPlanet-FrozenWorlds/Part147.mp3
Deleted OurPlanet-FrozenWorlds/Part153.mp3
Deleted OurPlanet-FrozenWorlds/Part120.mp3
Deleted OurPlanet-Fr

Deleting the 15 second segments of audiobook of "The Magic Shop" by H. G. Wells and keeping only the transcriptions

In [None]:
import os

def delete_mp3(folder):

    #Running code for each file in the given folder
    for filename in os.listdir(folder):

        #Looking for files that have the extension .mp3
        if filename.endswith('.mp3'):
            file_path = os.path.join(folder, filename)

            #Deleting the .mp3 file (15 second segment of the audio)
            os.remove(file_path)
            print(f"Deleted {file_path}")

if __name__ == "__main__":
    folder = "TheMagicShop"
    delete_mp3(folder)
    print("All .mp3 files deleted.")


Deleted TheMagicShop/Part103.mp3
Deleted TheMagicShop/Part28.mp3
Deleted TheMagicShop/Part42.mp3
Deleted TheMagicShop/Part88.mp3
Deleted TheMagicShop/Part38.mp3
Deleted TheMagicShop/Part89.mp3
Deleted TheMagicShop/Part81.mp3
Deleted TheMagicShop/Part61.mp3
Deleted TheMagicShop/Part17.mp3
Deleted TheMagicShop/Part97.mp3
Deleted TheMagicShop/Part95.mp3
Deleted TheMagicShop/Part59.mp3
Deleted TheMagicShop/Part12.mp3
Deleted TheMagicShop/Part107.mp3
Deleted TheMagicShop/Part47.mp3
Deleted TheMagicShop/Part18.mp3
Deleted TheMagicShop/Part46.mp3
Deleted TheMagicShop/Part106.mp3
Deleted TheMagicShop/Part22.mp3
Deleted TheMagicShop/Part66.mp3
Deleted TheMagicShop/Part56.mp3
Deleted TheMagicShop/Part100.mp3
Deleted TheMagicShop/Part101.mp3
Deleted TheMagicShop/Part25.mp3
Deleted TheMagicShop/Part26.mp3
Deleted TheMagicShop/Part65.mp3
Deleted TheMagicShop/Part90.mp3
Deleted TheMagicShop/Part87.mp3
Deleted TheMagicShop/Part32.mp3
Deleted TheMagicShop/Part41.mp3
Deleted TheMagicShop/Part31.mp3
Del

# **Downloading the folder**


Creating a .zip file of the folder with only the transcriptions of the 15 second segments of audio of "Our Planet - Frozen Worlds" by David Attenborough and downloading it from google.colab

In [None]:
from google.colab import files
import shutil

folder = "OurPlanet-FrozenWorlds"
output_file = "OurPlanet-FrozenWorlds.zip"

# Zipping the given folder
shutil.make_archive(output_file.replace('.zip', ''), 'zip', folder)

# Downloading the zipped file to the computer
files.download(output_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Creating a .zip file of the folder with only the transcriptions of the 15 second segments of audio of "The Magic Shop" by H. G. Wells and downloading it from google.colab


In [None]:
from google.colab import files
import shutil

folder = "TheMagicShop"
output_file = "TheMagicShop.zip"

# Zipping the given folder
shutil.make_archive(output_file.replace('.zip', ''), 'zip', folder)

# Downloading the zipped file to the computer
files.download(output_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>