# Installing Hub

In [None]:
!pip3 install hub --quiet

# Run below cells and restart the runtime
# if you are running it in colab
# import os
# os.kill(os.getpid(), 9) 

# Download raw dataset

In [None]:
from IPython.display import clear_output

In [None]:
# Download Dataset here
!wget https://github.com/karoldvl/ESC-50/archive/master.zip
!unzip master.zip
clear_output()

In [None]:
import os
import pandas as pd
import time
from tqdm import tqdm

# Creating dataset on hub

**Activeloop API** : https://docs.activeloop.ai/api-basics

In [None]:
import hub

# Login to ActiveLoop
%env BUGGER_OFF=True
!activeloop login -u username -p password
!activeloop reporting --off

In [None]:
!ls

In [None]:
basepath = 'ESC-50-master'

In [None]:
df = pd.read_csv(os.path.join(basepath, 'meta', 'esc50.csv'))

In [None]:
df.iloc[0]

In [None]:
class_names = df.category.unique().tolist()

In [None]:
hubname = 'hub://<username>/esc50'
ds = hub.dataset(hubname)

start = time.time()

with ds:
    ds.create_tensor('audio', htype='audio', sample_compression='wav')
    ds.create_tensor('labels', htype='class_label', class_names=class_names)
    ds.create_tensor('target')
    ds.create_tensor('fold')
    ds.create_tensor('esc10')
    ds.create_tensor('src_file', htype='text')
    ds.create_tensor('take', htype='text')
    
    audio_folder = os.path.join(basepath, 'audio')
    
    for index, row in tqdm(df.iterrows()):
        # print(f"[{index}] | {row['filename']}")
        audio_path = os.path.join(audio_folder, row['filename'])
        
        ds.append({
            'audio' : hub.read(audio_path),
            'labels' : class_names.index(row['category']),
            'target' : row['target'],
            'fold' : row['fold'],
            'esc10' : row['esc10'],
            'src_file' : str(row['src_file']),
            'take' : row['take']
        })
        
stop = time.time()
print(f'Time elapsed : {stop - start}')