In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import librosa
import re
import pandas as pd
import shutil

from src.preprocessing.process_recs_metadata import *
from src.util import overwrite_recs_as_npy

In [2]:
# These unctions which should be adapted to naming scheme, desired metadata columns and data storage structure
#specify colonies manually

def extract_metadata_african(rec_name):
    """ This function should be customized for particular naming scheme of the recordings
        Output should be dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    date = rec_name[rec_name.find('_') + 1 : rec_name.find(' ')]
    date = date[:2] + '-' + date[2:4] +  '-' + date[4:]
    ratids = 'whole'
    return {'name' : rec_name,
            'colony' : 'nmr',
            'date' : date,
            'ratids' : ratids,
            'number' : None}


def extract_metadata(rec_name):
    """ This function should be customized for particular naming scheme of the recordings
        Output is the dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    colony = rec_name[:rec_name.find('_')]
    date = re.findall('[0-9]+-[0-9]+-[0-9]+', rec_name)[0]
    ratids = re.findall('_[0-9, _]+_', rec_name)[0][1 : -1]
    num = rec_name[rec_name.rfind('_') + 1 : rec_name.find('.')]
    
    return {'name' : rec_name,
            'colony' : colony,
            'date' : date,
            'ratids' : ratids,
            'number' : num}

def extract_metadata_mice(rec_name):

    ratids = rec_name[:rec_name.find('pup')]
    date = re.findall('[0-9]+-[0-9]+-[0-9]+', rec_name)[0]
    ratids = re.findall('_[0-9, _]+_', rec_name)[0][1 : -1]
    
    return {'name' : rec_name,
            'date' : date,
            'ratids' : ratids, 
            'colony': 'mice',}

def build_path(recs_metadata, base_path):
    """ This function should be customized in order to create desired structure
        of the folders storing recordings. Using recording metadata as an input
        it builds path where recording will be saved and extends metadata with it """
    paths = base_path + recs_metadata['colony'] + '/recordings/'
    new_recs_metadata = pd.DataFrame(recs_metadata)
    new_recs_metadata['path'] = paths
    return new_recs_metadata

### Setting path to recordings you to upload, specifying metainfo parameters

In [3]:
# location of the metadata file, supposed to be always same
DATA_FOLDER_NAME = 'data/'
path_to_recordings_metadata = PATH_TO_PROJECT + DATA_FOLDER_NAME
if not os.path.isdir(path_to_recordings_metadata):
    os.makedirs(path_to_recordings_metadata)
recordings_metadata_name = 'recordings_metadata.csv'

sr = 22050

# path where recordings you want to upload are stored
path_to_new_recordings = PATH_TO_PROJECT + 'mouse vocalizations from mousetube/'
for rec_name in os.listdir(path_to_new_recordings):
    os.rename(path_to_new_recordings + rec_name, path_to_new_recordings + rec_name.replace('WAV', 'wav'))
overwrite_recs_as_npy(path_to_new_recordings, sr=sr)
new_rec_names =  [r for r in os.listdir(path_to_new_recordings) if '.npy' in r and 'split' not in r]

Found 0 .wav and 42 .npy recordings. Wav ones will be overwritten with npy format.


In [4]:
# If some of the new recordings are already in the metadata, value for 'experiment' will be concatenated.
# Other values will be overwritten
# Separator for experiments is ; i.g. 'experiment': 'exp1;exp2;exp3'

additional_parameters = {'experiment' : 'single rat softchirps', 
                         'processing stage' : 'fresh'}

### Extracting metadata

In [8]:
# extract metadata and extend it with additional_parameters
#change colony info manually above
# change 'extract_metadata_mice' to 'extract_metadata_african' or 'extract_metadata' depending on the data
new_recs_metadata = pd.DataFrame([extract_metadata_mice(rec_name) for rec_name in new_rec_names])
new_recs_metadata = add_columns_to_recs_metadata(new_recs_metadata, additional_parameters)
new_recs_metadata = build_path(new_recs_metadata, path_to_recordings_metadata)

In [10]:
# check if there is a recordings' metadata file already then load it or create new one
if recordings_metadata_name in os.listdir(path_to_recordings_metadata):
    old_recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)
else:
    old_recs_metadata = pd.DataFrame()
    
updated_metadata = merge_recs_metadata(old_recs_metadata, new_recs_metadata, additional_parameters)
updated_metadata.head()


New recordings do not have values for following metadata columns:
['number']
Current metadata file does not have these columns:
[]
42/42 recordings are already in the metadata


Unnamed: 0,name,date,ratids,colony,experiment,processing stage,path,number
0,C57pup_08-01-00_0215_0000008.npy,08-01-00,215,mice,mice,split,/home/gr1/Projects/Naked-mole-rat-voices/mice_...,
1,FVBpup_01-01-00_0142_0000001.npy,01-01-00,142,mice,mice,fresh,/home/gr1/Projects/Naked-mole-rat-voices/mice_...,
2,C57pup_03-01-00_0211_0000003.npy,03-01-00,211,mice,mice,fresh,/home/gr1/Projects/Naked-mole-rat-voices/mice_...,
3,C57pup_10-01-00_0215_0000010.npy,10-01-00,215,mice,mice,fresh,/home/gr1/Projects/Naked-mole-rat-voices/mice_...,
4,C57pup_02-01-00_0214_0000002.npy,02-01-00,214,mice,mice,fresh,/home/gr1/Projects/Naked-mole-rat-voices/mice_...,


### Saving updated metadata

In [11]:
# saves updated metadatafile and copies all new recordings to their new path
for rec_name, rec_path in new_recs_metadata[['name', 'path']].values:
    if not os.path.isdir(rec_path):
        os.makedirs(rec_path)
    shutil.copy2(src = path_to_new_recordings + rec_name, dst = rec_path + rec_name)
updated_metadata.to_csv(path_to_recordings_metadata + recordings_metadata_name, index=None)