In [19]:
import librosa
import IPython.display as ipd
import glob
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile
import os
from tqdm import tqdm

In [21]:
def float2pcm(sig, dtype='int16'):
    """Convert floating point signal with a range from -1 to 1 to PCM.
    Any signal values outside the interval [-1.0, 1.0) are clipped.
    No dithering is used.
    Note that there are different possibilities for scaling floating
    point numbers to PCM numbers, this function implements just one of
    them.  For an overview of alternatives see
    http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html
    reference: https://gist.github.com/HudsonHuang/fbdf8e9af7993fe2a91620d3fb86a182
    Parameters
    ----------
    sig : array_like
        Input array, must have floating point type.
    dtype : data type, optional
        Desired (integer) data type.
    Returns
    -------
    numpy.ndarray
        Integer data, scaled and clipped to the range of the given
        *dtype*.
    See Also
    --------
    pcm2float, dtype
    """
    sig = np.asarray(sig)
    if sig.dtype.kind != 'f':
        raise TypeError("'sig' must be a float array")
    dtype = np.dtype(dtype)
    if dtype.kind not in 'iu':
        raise TypeError("'dtype' must be an integer type")

    i = np.iinfo(dtype)
    abs_max = 2 ** (i.bits - 1)
    offset = i.min + abs_max
    return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)

In [5]:
data = sorted(glob.glob("/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/*.wav"))
len(data)

4000

In [6]:
data[0:10]

['/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia1.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia10.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia100.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia101.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia102.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia103.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia104.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia105.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia106.wav',
 '/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k/academia107.wav']

In [14]:
folder = os.path.dirname(data[0])
folder

'/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k'

In [71]:
out_folder = "/home/jovyan/work/OneDrive/Documentos/Doutorado/dataset/GPS_cmd_16k_renamed"
os.makedirs(out_folder, exist_ok=True)

In [69]:
def rename_files(intervals, user):
    places = ["academia", "aeroporto", "casa", "cinema", "escola", "faculdade", "farmacia", "hospital", "mercado", "trabalho"]
    for place in tqdm(places):
        for num_inter, inter in enumerate(intervals, start=1):
            #input()
            init = inter[0]
            end = inter[1]
            idx = 1
            for i in range(init, end+1):
                #print(i)
                old_name = f"{folder}/{place}{i}.wav"
                #print(old_name)
                out_name = old_name.replace(f"{place}{i}", f"{place}_{user[num_inter-1]}_{idx:03}").replace("GPS_cmd_16k", "GPS_cmd_16k_renamed")
                #print(out_name)
                idx += 1
                
                audio = librosa.load(old_name, sr=16000)[0]
                audio_int16 = float2pcm(audio, dtype='int16')
                wavfile.write(out_name, 16000, audio_int16)

In [66]:
start_i = 1
start_end = 10
intervals_1 = []
for i in range(0, 100, 10):
    init = (start_i + i)
    end = (start_end + i)
    intervals_1.append((init, end))

In [67]:
intervals_1

[(1, 10),
 (11, 20),
 (21, 30),
 (31, 40),
 (41, 50),
 (51, 60),
 (61, 70),
 (71, 80),
 (81, 90),
 (91, 100)]

In [72]:
user = ["F001", "F002", "F003", "F004", "F005", "F006", "F007", "F008", "F009", "F010"]
rename_files(intervals_1, user)
# places = ["academia", "aeroporto", "casa", "cinema", "escola", "faculdade", "farmacia", "hospital", "mercado", "trabalho"]
# for place in tqdm(places):
#     for num_inter, inter in enumerate(intervals, start=1):
#         #input()
#         init = inter[0]
#         end = inter[1]
#         idx = 1
#         for i in range(init, end+1):
#             #print(i)
#             old_name = f"{folder}/{place}{i}.wav"
#             #print(old_name)
#             out_name = old_name.replace(f"{place}{i}", f"{place}_{user}{num_inter:03}_{idx:03}").replace("GPS_cmd_16k", "GPS_cmd_16k_renamed")
#             #print(out_name)
#             idx += 1
            
#             audio = librosa.load(old_name, sr=16000)[0]
#             audio_int16 = float2pcm(audio, dtype='int16')
#             wavfile.write(out_name, 16000, audio_int16)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:40<00:00,  4.10s/it]


In [51]:
start_i = 1
start_end = 10
intervals_2 = []
for i in range(100, 200, 10):
    init = (start_i + i)
    end = (start_end + i)
    intervals_2.append((init, end))

In [52]:
intervals_2

[(101, 110),
 (111, 120),
 (121, 130),
 (131, 140),
 (141, 150),
 (151, 160),
 (161, 170),
 (171, 180),
 (181, 190),
 (191, 200)]

In [73]:
user = ["M001", "M002", "M003", "M004", "M005", "M006", "M007", "M008", "M009", "M010"]
rename_files(intervals_2, user)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:40<00:00,  4.06s/it]


In [55]:
start_i = 1
start_end = 10
intervals_3 = []
for i in range(200, 300, 10):
    init = (start_i + i)
    end = (start_end + i)
    intervals_3.append((init, end))

In [56]:
intervals_3

[(201, 210),
 (211, 220),
 (221, 230),
 (231, 240),
 (241, 250),
 (251, 260),
 (261, 270),
 (271, 280),
 (281, 290),
 (291, 300)]

In [74]:
user = ["F011", "F012", "F013", "F014", "F015", "F016", "F017", "F018", "F019", "F020"]
rename_files(intervals_3, user)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:42<00:00,  4.22s/it]


In [58]:
start_i = 1
start_end = 10
intervals_4 = []
for i in range(300, 400, 10):
    init = (start_i + i)
    end = (start_end + i)
    intervals_4.append((init, end))

In [59]:
intervals_4

[(301, 310),
 (311, 320),
 (321, 330),
 (331, 340),
 (341, 350),
 (351, 360),
 (361, 370),
 (371, 380),
 (381, 390),
 (391, 400)]

In [75]:
user = ["M011", "M012", "M013", "M014", "M015", "M016", "M017", "M018", "M019", "M020"]
rename_files(intervals_4, user)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:43<00:00,  4.36s/it]
