In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import librosa
import re
import pandas as pd
import shutil

from src.preprocessing.process_recs_metadata import *
from src.util import overwrite_recs_as_npy

In [2]:
# These unctions which should be adapted to naming scheme, desired metadata columns and data storage structure
#specify colonies manually

def extract_metadata_african(rec_name):
    """ This function should be customized for particular naming scheme of the recordings
        Output should be dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    date = rec_name[rec_name.find('_') + 1 : rec_name.find(' ')]
    date = date[:2] + '-' + date[2:4] +  '-' + date[4:]
    ratids = 'whole'
    return {'name' : rec_name,
            'colony' : 'nmr',
            'date' : date,
            'ratids' : ratids,
            'number' : None}


def extract_metadata(rec_name):
    """ This function should be customized for particular naming scheme of the recordings
        Output is the dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    colony = rec_name[:rec_name.find('_')]
    date = re.findall('[0-9]+-[0-9]+-[0-9]+', rec_name)[0]
    ratids = re.findall('_[0-9, _]+_', rec_name)[0][1 : -1]
    num = rec_name[rec_name.rfind('_') + 1 : rec_name.find('.')]
    
    return {'name' : rec_name,
            'colony' : colony,
            'date' : date,
            'ratids' : ratids,
            'number' : num}

def extract_metadata_mice(rec_name):

    ratids = rec_name[:rec_name.find('pup')]
    date = re.findall('[0-9]+-[0-9]+-[0-9]+', rec_name)[0]
    ratids = re.findall('_[0-9, _]+_', rec_name)[0][1 : -1]
    
    return {'name' : rec_name,
            'date' : date,
            'ratids' : ratids, 
            'colony': 'mice',}

def build_path(recs_metadata, base_path):
    """ This function should be customized in order to create desired structure
        of the folders storing recordings. Using recording metadata as an input
        it builds path where recording will be saved and extends metadata with it """
    paths = base_path + recs_metadata['colony'] + '/recordings/'
    new_recs_metadata = pd.DataFrame(recs_metadata)
    new_recs_metadata['path'] = paths
    return new_recs_metadata

### Setting path to recordings you to upload, specifying metainfo parameters

In [3]:
# location of the metadata file, supposed to be always same
path_to_recordings_metadata = PATH_TO_PROJECT + 'data/'
if not os.path.isdir(path_to_recordings_metadata):
    os.makedirs(path_to_recordings_metadata)
recordings_metadata_name = 'recordings_metadata.csv'

# path where recordings you want to upload are stored
path_to_new_recordings = PATH_TO_PROJECT + 'data/for nmr split test/'
for rec_name in os.listdir(path_to_new_recordings):
    os.rename(path_to_new_recordings + rec_name, path_to_new_recordings + rec_name.replace('WAV', 'wav'))
overwrite_recs_as_npy(path_to_new_recordings, sr=22050)
new_rec_names =  [r for r in os.listdir(path_to_new_recordings) if '.npy' in r and 'split' not in r]

FileNotFoundError: [Errno 2] No such file or directory: '/home/gr1/Projects/naked-mole-rats/data/for nmr split test/'

In [None]:
rec, sr = librosa.load(PATH_TO_PROJECT + 'hvdklein_02-09-19_0000001.wav')

In [4]:
# If some of the new recordings are already in the metadata, value for 'experiment' will be concatenated.
# Other values will be overwritten
# Separator for experiments is ; i.g. 'experiment': 'exp1;exp2;exp3'

additional_parameters = {'experiment' : 'african', 
                         'processing stage' : 'labeled and checked'}

### Extracting metadata

In [5]:
# extract metadata and extend it with additional_parameters
#change colony info manually above
# change 'extract_metadata_mice' to 'extract_metadata_african' or 'extract_metadata' depending on the data
new_recs_metadata = pd.DataFrame([extract_metadata_african(rec_name) for rec_name in new_rec_names])
new_recs_metadata = add_columns_to_recs_metadata(new_recs_metadata, additional_parameters)
new_recs_metadata = build_path(new_recs_metadata, path_to_recordings_metadata)

In [6]:
# check if there is a recordings' metadata file already then load it or create new one
if recordings_metadata_name in os.listdir(path_to_recordings_metadata):
    old_recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)
else:
    old_recs_metadata = pd.DataFrame()
    
updated_metadata = merge_recs_metadata(old_recs_metadata, new_recs_metadata, additional_parameters)
updated_metadata.head()


New recordings do not have values for following metadata columns:
[]
Current metadata file does not have these columns:
[]


  return pd.concat([old_recs_metadata_copy, new_recs_metadata_copy], 0).reset_index(drop = True)


Unnamed: 0,colony,date,experiment,name,number,path,processing stage,ratids
0,baratheon,21-06-19,single rat softchirps,baratheon_21-06-19_9449_0000001.npy,1.0,/home/nakedmoleratvoices/data/baratheon/record...,traced,9449
1,martell,08-11-19,single rat softchirps,martell_08-11-19_9438_0000035.npy,35.0,/home/nakedmoleratvoices/data/martell/recordings/,labeled,9438
2,dothrakia,21-08-19,single rat softchirps,dothrakia_21-08-19_3006_0000008.npy,8.0,/home/nakedmoleratvoices/data/dothrakia/record...,labeled,3006
3,targaryen,21-06-19,single rat softchirps,targaryen_21-06-19_1540_0000001.npy,1.0,/home/nakedmoleratvoices/data/targaryen/record...,traced and checked,1540
4,stark,21-06-19,single rat softchirps,stark_21-06-19_1545_0000001.npy,1.0,/home/nakedmoleratvoices/data/stark/recordings/,labeled and checked,1545


### Saving updated metadata

In [8]:
# saves updated metadatafile and copies all new recordings to their new path
for rec_name, rec_path in new_recs_metadata[['name', 'path']].values:
    if not os.path.isdir(rec_path):
        os.makedirs(rec_path)
    if ( path_to_new_recordings + rec_name) != (rec_path + rec_name):
        shutil.copy2(src = path_to_new_recordings + rec_name, dst = rec_path + rec_name)
updated_metadata.to_csv(path_to_recordings_metadata + recordings_metadata_name, index=None)

In [9]:
updated_metadata[updated_metadata['colony'] == 'nmr']

Unnamed: 0,colony,date,experiment,name,number,path,processing stage,ratids
1748,nmr,28-08-2019,african,nmr_28082019 Mic 2 Tag 6 Vid Left Part 1_ptA.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1749,nmr,26-08-2019,african,nmr_26082019 Mic 2 Tag 6 Vid Left Part 3_ptE.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1750,nmr,23-08-2019,african,nmr_23082019 Mic 2 Tag 6 Vid Left Part 1_ptG.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1751,nmr,23-08-2019,african,nmr_23082019 Mic2 Tag 6 Vid Left Part 3_ptD.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1752,nmr,28-08-2019,african,nmr_28082019 Mic 2 Tag 6 Vid Left Part 3_ptC.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
...,...,...,...,...,...,...,...,...
1805,nmr,23-08-2019,african,nmr_23082019 Mic2 Tag 6 Vid Left Part 3_ptA.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1806,nmr,28-08-2019,african,nmr_28082019 Mic 2 Tag 6 Vid Left Part 5_ptB.npy,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled,whole
1846,nmr,26-02-019,african,nmr_2602019 Mic 2 Tag 6 Vid Left Part 2_ptC_an...,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled and checked,whole
1847,nmr,26-02-019,african,nmr_2602019 Mic 2 Tag 6 Vid Left Part 2_ptB_an...,,/home/nakedmoleratvoices/data/nmr/recordings/,labeled and checked,whole
