In [1]:
import os
import urllib.request as req
import pandas as pd

## Dataset Preparation

Dataset got from `NoCopyRight Sound` with 150 audio with detail below:

- Sample Rate: 44.1 Khz (44100 sample)
- Channel: 2 (Stereo)
- Precission: 16bit
- Bitrate: 320k

### Folder Structure

```
├─ mfcc.ipynb
├─ dtw.ipynb
├─ evaluation.ipynb
├─ ceremony.ipynb
├─ csv
│  ├─ train.csv
│  └─ test.csv
├─ audio             # store all dataset (.mp3)
│  ├─ train          # store all train dataset (.mp3)
│  └─ test           # store all test dataset (.mp3)
│     ├─ normal      # no noise
│     │  ├─ 100      # no noise and 100% duration
│     │  └─ 50       # no noise and 50% duration
│     └─ noise       # with noise
│        ├─ 100      # with noise and 100% duration
│        └─ 50       # with noise and 50% duration
├─ npy
│  ├─ train          # store all train features (.npy)
│  │  ├─ 05          # store all train features (0.5s frame) (.npy)
│  │  ├─ 10
│  │  └─ 15
│  └─ test           # store all test features (.npy)
│     ├─ normal      # store all test features (no noise) (.npy)
│     │  ├─ 100      # store all test features (no noise, 100% duration) (.npy)     
│     │  │  ├─ 05    # store all test features (no noise, 100% duration, 0.5s frame) (.npy)
│     │  │  ├─ 10
│     │  │  └─ 15
│     │  └─ 50
│     │     ├─ 05
│     │     ├─ 10
│     │     └─ 15
│     └─ noise
│        ├─ 100
│        │  ├─ 05
│        │  ├─ 10
│        │  └─ 15
│        └─ 50
│           ├─ 05
│           ├─ 10
│           └─ 15
└ results            # evaluation result
```

### Create All Folder

In [2]:
def create_folder(path):
  if not os.path.exists(path):
    os.makedirs(path)

In [4]:
# create folder for train | test dataset (store mp3)
create_folder("audio")
create_folder("audio/train")
create_folder("audio/test")
create_folder("audio/test/normal")
create_folder("audio/test/normal/50")
create_folder("audio/test/normal/100")
create_folder("audio/test/noise")
create_folder("audio/test/noise/50")
create_folder("audio/test/noise/100")

# create folder for npy (mfcc's result)
create_folder("npy")
create_folder("npy/train")
create_folder("npy/train/05")
create_folder("npy/train/10")
create_folder("npy/train/15")
create_folder("npy/test")
create_folder("npy/test/normal")
create_folder("npy/test/normal/50")
create_folder("npy/test/normal/50/05")
create_folder("npy/test/normal/50/10")
create_folder("npy/test/normal/50/15")
create_folder("npy/test/normal/100")
create_folder("npy/test/normal/100/05")
create_folder("npy/test/normal/100/10")
create_folder("npy/test/normal/100/15")
create_folder("npy/test/noise")
create_folder("npy/test/noise/50")
create_folder("npy/test/noise/50/05")
create_folder("npy/test/noise/50/10")
create_folder("npy/test/noise/50/15")
create_folder("npy/test/noise/100")
create_folder("npy/test/noise/100/05")
create_folder("npy/test/noise/100/10")
create_folder("npy/test/noise/100/15")

# create folder for csv (to download mp3)
create_folder("csv")

# create folder for evaluation result
create_folder("result")
create_folder("result/matching")
create_folder("result/evaluation")

# create folder for ui (streamlit) resources
create_folder("resources")

### Download CSV

In [4]:
def download_csv(link, path):
  req.urlretrieve(link, path)

In [5]:
all_csv = [
  {"name": "train", "audio_path": "audio/train", "link": "https://docs.google.com/spreadsheets/d/1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc/export?format=csv&id=1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc&gid=1280949540"},
  {"name": "test_normal_100", "audio_path": "audio/test/normal/100", "link": "https://docs.google.com/spreadsheets/d/1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc/export?format=csv&id=1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc&gid=328562984"},
  {"name": "test_normal_50", "audio_path": "audio/test/normal/50", "link": "https://docs.google.com/spreadsheets/d/1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc/export?format=csv&id=1-JBYn8MiLIcnJwqT2j33H7iEbVIgCUDNdG68yBDW5zc&gid=1328854310"},
  {"name": "test_noise_100", "audio_path": "audio/test/noise/100", "link": None},
  {"name": "test_noise_50", "audio_path": "audio/test/noise/50", "link": None},
]

pd.DataFrame(all_csv)

Unnamed: 0,name,audio_path,link
0,train,audio/train,https://docs.google.com/spreadsheets/d/1-JBYn8...
1,test_normal_100,audio/test/normal/100,https://docs.google.com/spreadsheets/d/1-JBYn8...
2,test_normal_50,audio/test/normal/50,https://docs.google.com/spreadsheets/d/1-JBYn8...
3,test_noise_100,audio/test/noise/100,
4,test_noise_50,audio/test/noise/50,


In [6]:
for csv in all_csv:
  if csv["link"] is not None:
    print(f"Downloading {csv['name']}...")
    download_csv(csv["link"], f"csv/{csv['name']}.csv")

Downloading train...
Downloading test_normal_100...
Downloading test_normal_50...


### Download Audio (MP3)

In [7]:
for csv in all_csv:
  if csv["link"] is not None:
    if len(os.listdir(csv['audio_path'])) == 0:
      print(f"downloading audio in {csv['audio_path']}...")
      dataset = pd.read_csv(f"csv/{csv['name']}.csv")
      for i, row in dataset.iterrows():
        print(f"\tDownloading {row['title']}.mp3...")
        req.urlretrieve(row["link"], f"{csv['audio_path']}/{row['title']}.mp3")
    else:
      print(f"all song in {csv['audio_path']} downloaded")

downloading audio in audio/train...
	Downloading 4U.mp3...
	Downloading 23.mp3...
	Downloading Watch The World Burn.mp3...
	Downloading Ark.mp3...
	Downloading Arrow.mp3...
	Downloading Awakening.mp3...
	Downloading Be Around.mp3...
	Downloading Blank VIP.mp3...
	Downloading Blank.mp3...
	Downloading Bleed.mp3...
	Downloading C U Again.mp3...
	Downloading Castle.mp3...
	Downloading Cetus.mp3...
	Downloading Circles.mp3...
	Downloading Clear My Head.mp3...
	Downloading Close.mp3...
	Downloading Coming Home.mp3...
	Downloading Control.mp3...
	Downloading Cradles.mp3...
	Downloading Crazy.mp3...
	Downloading Crest.mp3...
	Downloading Cyberpunk.mp3...
	Downloading Dancefloor.mp3...
	Downloading Defeat The Night.mp3...
	Downloading Devil.mp3...
	Downloading Dino.mp3...
	Downloading Dreams pt. II.mp3...
	Downloading Dreams.mp3...
	Downloading Earth.mp3...
	Downloading Eclipse.mp3...
	Downloading Energy.mp3...
	Downloading Enslaved.mp3...
	Downloading Entropy.mp3...
	Downloading Fall To Light