<a href="https://colab.research.google.com/github/atick-faisal/Crowd-Emotion/blob/main/src_v4/CE_Window_Generation_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
import tarfile
import requests
import numpy as np
from librosa import load
from joblib import dump
from tqdm import tqdm

In [9]:
DATASET_ID   = '1HOe5sJe_Juf5uib4f-8pPv-Z64lmqQ4X'
BASE_DIR     = '/content/drive/MyDrive/Research/Crowd Emotion Val/'
AUDIO_DIR    = 'Normalized/'
WINDOWS_DIR  = 'Windows_AF/'
# FOLDS        = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
FOLDS        = ['.']
EMOTIONS     = ['Approval', 'Disapproval', 'Neutral']

Fs           = 44100
WIN_LEN      = int(1.0 * Fs)
OVERLAP      = 0.75

In [3]:
#--------------------- Download util for Google Drive ------------------- #

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
        
    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value
        
    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk:
                f.write(chunk)

def download_data(fid, destination):
    print('cleaning already existing files ... ', end='')
    try:
        shutil.rmtree(destination)
        print('√')
    except:
        print('✕')
        
    print('creating data directory ... ', end='')
    os.mkdir(destination)
    print('√')
    
    print('downloading dataset from the repository ... ', end='')
    filename = os.path.join(destination, 'dataset.tar.xz')
    try:
        download_file_from_google_drive(fid, filename)
        print('√')
    except:
        print('✕')
        
    print('extracting the dataset ... ', end='')
    try:
        tar = tarfile.open(filename)
        tar.extractall(destination)
        tar.close()
        print('√')
    except:
        print('✕')

In [4]:
def get_windows(signal, length=WIN_LEN, overlap=OVERLAP):
    increment = length - round(length * overlap)
    num_windows = int((len(signal) - length) / increment) + 1
    samples = np.zeros((num_windows, length))
    for i in range(num_windows):
        temp = i * increment
        samples[i] = signal[temp:(temp + length)]
    return np.array(samples)

In [5]:
# ------- Comment This if already downloaded -------- #

# destination = os.path.join(BASE_DIR, AUDIO_DIR)
# download_data(DATASET_ID, destination)

In [10]:
write_dir = os.path.join(BASE_DIR, WINDOWS_DIR)

print('cleaning already existing files ... ', end='')
try:
    shutil.rmtree(write_dir)
    print('√')
except:
    print('✕')
    
print('creating windows directory ... ', end='')
os.mkdir(write_dir)
print('√')

for emotion in EMOTIONS:
    print('processing audio for ' + emotion, end=' ... ')
    os.mkdir(os.path.join(write_dir, emotion))
    for fold in FOLDS:
        # os.mkdir(os.path.join(write_dir, emotion, fold))
        fold_path = os.path.join(BASE_DIR, AUDIO_DIR, emotion, fold)
        filenames = os.listdir(fold_path)
        for filename in filenames:
            file_path = os.path.join(fold_path, filename)
            x, _ = load(
                path      = file_path,
                sr        = Fs,
                mono      = True
            )
            extension = x.shape[0] % Fs
            windows = get_windows(x, length=Fs)
            windows_name = os.path.join(
                # write_dir, emotion, fold, filename + '.joblib'
                write_dir, emotion, filename + '.joblib'
            )
            dump(windows, windows_name)

    print('√')

cleaning already existing files ... √
creating windows directory ... √
processing audio for Approval ... √
processing audio for Disapproval ... √
processing audio for Neutral ... √
