In [2]:
import pandas as pd
import numpy as np
import shutil
import os
from tqdm import tqdm
import glob

## Load metadata and get label list

In [3]:
metadatafile = '/Users/dilipharish/1 UPF Course Work/Music Information Retreival/UrbanSound8K/metadata/UrbanSound8K.csv'
metadata = pd.read_csv(metadatafile)

In [4]:
label_list = sorted(metadata['class'].unique())
print(label_list)

['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']


## Create soundbank for TRAIN (folds 1-6)

In [9]:
folds = [1, 2, 3, 4, 5, 6]
splitname = 'train'

soundbankfolder = '/Users/dilipharish/Master-Thesis/scaper-master/datasets/soundbanks'
soundbankfolder = os.path.join(soundbankfolder, splitname)

label_count = {}
for label in label_list:
    label_count[label] = 0

for fold in folds:
    
    print('FOLD {:d}'.format(fold))
    audiofolder =  '/Users/dilipharish/1 UPF Course Work/Music Information Retreival/UrbanSound8K/audio/fold{:d}/'.format(fold)
    audiofiles = glob.glob(os.path.join(audiofolder, '*.wav'))
    
    fold_label_count = {}
    for label in label_list:
        fold_label_count[label] = 0
    
    for af in tqdm(audiofiles):
        # print(os.path.basename(af))
        label = metadata.loc[metadata.slice_file_name == os.path.basename(af), 'class'].values[0]
        label_count[label] += 1
        fold_label_count[label] += 1
        destfolder = os.path.join(soundbankfolder, 'foreground', label)
        if not os.path.isdir(destfolder):
            os.mkdir(destfolder)
        destfile = os.path.join(destfolder, os.path.basename(af))
        shutil.copyfile(af, destfile)
        
    # Print fold report
    print('   Fold {:d} labels:'.format(fold))
    for label in label_list:
        print('   {:s}:\t{:d}'.format(label, fold_label_count[label]))
        
# Print overall report
print('\n\nOVERALL labels:')
for label in label_list:
    print('{:s}:\t{:d}'.format(label, label_count[label]))

FOLD 1


100%|██████████| 873/873 [00:02<00:00, 336.57it/s]


   Fold 1 labels:
   air_conditioner:	100
   car_horn:	36
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	96
   gun_shot:	35
   jackhammer:	120
   siren:	86
   street_music:	100
FOLD 2


100%|██████████| 888/888 [00:02<00:00, 353.22it/s]


   Fold 2 labels:
   air_conditioner:	100
   car_horn:	42
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	100
   gun_shot:	35
   jackhammer:	120
   siren:	91
   street_music:	100
FOLD 3


100%|██████████| 925/925 [00:02<00:00, 352.95it/s]


   Fold 3 labels:
   air_conditioner:	100
   car_horn:	43
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	107
   gun_shot:	36
   jackhammer:	120
   siren:	119
   street_music:	100
FOLD 4


100%|██████████| 990/990 [00:02<00:00, 352.25it/s]


   Fold 4 labels:
   air_conditioner:	100
   car_horn:	59
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	107
   gun_shot:	38
   jackhammer:	120
   siren:	166
   street_music:	100
FOLD 5


100%|██████████| 936/936 [00:02<00:00, 344.60it/s]


   Fold 5 labels:
   air_conditioner:	100
   car_horn:	98
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	107
   gun_shot:	40
   jackhammer:	120
   siren:	71
   street_music:	100
FOLD 6


100%|██████████| 823/823 [00:02<00:00, 389.52it/s]

   Fold 6 labels:
   air_conditioner:	100
   car_horn:	28
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	107
   gun_shot:	46
   jackhammer:	68
   siren:	74
   street_music:	100


OVERALL labels:
air_conditioner:	600
car_horn:	306
children_playing:	600
dog_bark:	600
drilling:	600
engine_idling:	624
gun_shot:	230
jackhammer:	668
siren:	607
street_music:	600





## Create soundbank for VALIDATE (folds 7-8)

In [11]:
folds = [7, 8]
splitname = 'validate'

soundbankfolder = '/Users/dilipharish/Master-Thesis/scaper-master/datasets/soundbanks'
soundbankfolder = os.path.join(soundbankfolder, splitname)

label_count = {}
for label in label_list:
    label_count[label] = 0

for fold in folds:
    
    print('FOLD {:d}'.format(fold))
    audiofolder = '/Users/dilipharish/1 UPF Course Work/Music Information Retreival/UrbanSound8K/audio/fold{:d}/'.format(fold)
    audiofiles = glob.glob(os.path.join(audiofolder, '*.wav'))
    
    fold_label_count = {}
    for label in label_list:
        fold_label_count[label] = 0
    
    for af in tqdm(audiofiles):
        # print(os.path.basename(af))
        label = metadata.loc[metadata.slice_file_name == os.path.basename(af), 'class'].values[0]
        label_count[label] += 1
        fold_label_count[label] += 1
        destfolder = os.path.join(soundbankfolder, 'foreground', label)
        if not os.path.isdir(destfolder):
            os.mkdir(destfolder)
        destfile = os.path.join(destfolder, os.path.basename(af))
        shutil.copyfile(af, destfile)
        
    # Print fold report
    print('   Fold {:d} labels:'.format(fold))
    for label in label_list:
        print('   {:s}:\t{:d}'.format(label, fold_label_count[label]))
        
# Print overall report
print('\n\nOVERALL labels:')
for label in label_list:
    print('{:s}:\t{:d}'.format(label, label_count[label]))

FOLD 7


100%|██████████| 838/838 [00:02<00:00, 355.41it/s]


   Fold 7 labels:
   air_conditioner:	100
   car_horn:	28
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	106
   gun_shot:	51
   jackhammer:	76
   siren:	77
   street_music:	100
FOLD 8


100%|██████████| 806/806 [00:02<00:00, 362.39it/s]

   Fold 8 labels:
   air_conditioner:	100
   car_horn:	30
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	88
   gun_shot:	30
   jackhammer:	78
   siren:	80
   street_music:	100


OVERALL labels:
air_conditioner:	200
car_horn:	58
children_playing:	200
dog_bark:	200
drilling:	200
engine_idling:	194
gun_shot:	81
jackhammer:	154
siren:	157
street_music:	200





## Create soundbank for TEST (folds 9-10)

In [13]:
folds = [9, 10]
splitname = 'test'

soundbankfolder = '/Users/dilipharish/Master-Thesis/scaper-master/datasets/soundbanks'
soundbankfolder = os.path.join(soundbankfolder, splitname)

label_count = {}
for label in label_list:
    label_count[label] = 0

for fold in folds:
    
    print('FOLD {:d}'.format(fold))
    audiofolder = '/Users/dilipharish/1 UPF Course Work/Music Information Retreival/UrbanSound8K/audio/fold{:d}/'.format(fold)
    audiofiles = glob.glob(os.path.join(audiofolder, '*.wav'))
    
    fold_label_count = {}
    for label in label_list:
        fold_label_count[label] = 0
    
    for af in tqdm(audiofiles):
        # print(os.path.basename(af))
        label = metadata.loc[metadata.slice_file_name == os.path.basename(af), 'class'].values[0]
        label_count[label] += 1
        fold_label_count[label] += 1
        destfolder = os.path.join(soundbankfolder, 'foreground', label)
        if not os.path.isdir(destfolder):
            os.mkdir(destfolder)
        destfile = os.path.join(destfolder, os.path.basename(af))
        shutil.copyfile(af, destfile)
        
    # Print fold report
    print('   Fold {:d} labels:'.format(fold))
    for label in label_list:
        print('   {:s}:\t{:d}'.format(label, fold_label_count[label]))
        
# Print overall report
print('\n\nOVERALL labels:')
for label in label_list:
    print('{:s}:\t{:d}'.format(label, label_count[label]))

FOLD 9


100%|██████████| 816/816 [00:02<00:00, 366.50it/s]


   Fold 9 labels:
   air_conditioner:	100
   car_horn:	32
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	89
   gun_shot:	31
   jackhammer:	82
   siren:	82
   street_music:	100
FOLD 10


100%|██████████| 837/837 [00:02<00:00, 363.95it/s]

   Fold 10 labels:
   air_conditioner:	100
   car_horn:	33
   children_playing:	100
   dog_bark:	100
   drilling:	100
   engine_idling:	93
   gun_shot:	32
   jackhammer:	96
   siren:	83
   street_music:	100


OVERALL labels:
air_conditioner:	200
car_horn:	65
children_playing:	200
dog_bark:	200
drilling:	200
engine_idling:	182
gun_shot:	63
jackhammer:	178
siren:	165
street_music:	200





## Copy noise excerpt for all files

In [16]:
noisefile = '/Users/dilipharish/Master-Thesis/scaper-master/audio/noise/444245__matrixxx__brownian_noise_10_sec.wav'
soundbankfolder = '/Users/dilipharish/Master-Thesis/scaper-master/datasets/soundbanks'

for split in ['train', 'validate', 'test']:
    
    destfolder = os.path.join(soundbankfolder, split, 'background', 'noise')
    if not os.path.isdir(destfolder):
        os.mkdir(destfolder)
        
    destfile = os.path.join(destfolder, os.path.basename(noisefile))
    shutil.copyfile(noisefile, destfile)

In [3]:
import openai
import os

# Load the OpenAI API key from an environment variable or file
openai.api_key = os.getenv("OPENAI_API_KEY")
# Prompt the user to enter a set of tags
prompt = input("Enter a set of tags: dog bark, walking steps, typing")

# Set up the OpenAI API parameters

model = "text-davinci-003"
temperature = 0.7
max_tokens = 256

# Generate the text description using the OpenAI API
response = openai.Completion.create(
    engine=model,
    prompt=prompt,
    temperature=temperature,
    max_tokens=max_tokens,
)

# Print the text description
print(response.choices[0].text.strip())


1. An abrupt car horn sound followed by the loud gun shot and drilling.
2. A car horn, gun shot and drilling all occurring in quick succession.
3. A series of sharp noises including a car horn, gun shot and drilling.
4. A car horn, gun shot and drilling all sounding in the same moment. 
5. An instantaneous symphony of car horn, gun shot and drilling.
