In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import librosa
import re
import pandas as pd
import shutil

from src.metadata_processing.process_recs_metadata import *
from src.util import overwrite_recs_as_npy

In [2]:
# These unctions which should be adapted to naming scheme, desired metadata columns and data storage structure
#specify colonies manually

def extract_metadata_african(rec_name):
    """ This function should be customized for particular naming scheme of the recordings
        Output should be dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    date = rec_name[rec_name.find('_') + 1 : rec_name.find(' ')]
    date = date[:2] + '-' + date[2:4] +  '-' + date[4:]
    colony = rec_name[:rec_name.find('_')]
    ratids = 'whole'
    return {'name' : rec_name,
            'colony' : colony,
            'date' : date,
            'ratids' : ratids,
            'number' : None}


def extract_metadata(rec_name, ratids=None):
    """ This function should be customized for particular naming scheme of the recordings
        Output is the dictionary {'col1' : v1, 'col2' : v2} that has metainfo values
        of the recording """
    colony = rec_name[:rec_name.find('_')]
    date = re.findall('[0-9]+-[0-9]+-[0-9]+', rec_name)[0]
    if ratids is None:
        ratids = re.findall('_[0-9, _]+_', rec_name)[0][1 : -1]
    else:
        ratids = ratids
    num = rec_name[rec_name.rfind('_') + 1 : rec_name.find('.')]
    
    return {'name' : rec_name,
            'colony' : colony,
            'date' : date,
            'ratids' : ratids,
            'number' : num}

def build_path(recs_metadata, base_path):
    """ This function should be customized in order to create desired structure
        of the folders storing recordings. Using recording metadata as an input
        it builds path where recording will be saved and extends metadata with it """
    paths = base_path + recs_metadata['colony'] + '/recordings/'
    new_recs_metadata = pd.DataFrame(recs_metadata)
    new_recs_metadata['path'] = paths
    return new_recs_metadata

### Setting path to recordings you to upload, specifying metainfo parameters

In [3]:
# location of the metadata file, supposed to be always same
path_to_recordings_metadata = PATH_TO_PROJECT + 'data/'
if not os.path.isdir(path_to_recordings_metadata):
    os.makedirs(path_to_recordings_metadata)
recordings_metadata_name = 'recordings_metadata.csv'



# path where recordings you want to upload are stored
path_to_new_recordings = PATH_TO_PROJECT + 'data/berlinannotated/'
sr = 22050 # Sampling rate at which data will be saved. Keep it 22050 for the NMR.

for rec_name in os.listdir(path_to_new_recordings):
    os.rename(path_to_new_recordings + rec_name, path_to_new_recordings + rec_name.replace('WAV', 'wav'))
overwrite_recs_as_npy(path_to_new_recordings, sr=sr)
new_rec_names =  [r for r in os.listdir(path_to_new_recordings) if '.npy' in r and 'split' not in r]

Found 0 .wav and 0 .npy recordings. Wav ones will be overwritten with npy format.


In [37]:
# If some of the new recordings are already in the metadata, value for 'experiment' will be concatenated.
# Other values will be overwritten
# Separator for experiments is ; i.g. 'experiment': 'exp1;exp2;exp3'

additional_parameters = {'experiment' : 'berlin', 
                         'processing stage' : 'labeled and checked'}

### Extracting metadata

In [41]:
# extract metadata and extend it with additional_parameters
#change colony info manually above
# change 'extract_metadata_mice' to 'extract_metadata_african' or 'extract_metadata' depending on the data

ratids = None # Automated ratids extration fails for whole colony recordings. 
              #  In this case, change this variable to ratids = 'whole'
new_recs_metadata = pd.DataFrame([extract_metadata(rec_name, ratids) for rec_name in new_rec_names])
new_recs_metadata = add_columns_to_recs_metadata(new_recs_metadata, additional_parameters)
new_recs_metadata = build_path(new_recs_metadata, path_to_recordings_metadata)

In [43]:
# check if there is a recordings' metadata file already then load it or create new one
if recordings_metadata_name in os.listdir(path_to_recordings_metadata):
    old_recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)
else:
    old_recs_metadata = pd.DataFrame()
    
updated_metadata = merge_recs_metadata(old_recs_metadata, new_recs_metadata)
updated_metadata.tail()


New recordings do not have values for following metadata columns:
[]
Current metadata file does not have these columns:
[]


  return pd.concat([old_recs_metadata_copy, new_recs_metadata_copy], 0).reset_index(drop=True)


Unnamed: 0,colony,date,experiment,name,number,path,processing stage,ratids
1945,proudfeet,26-03-22,single rat softchirps,proudfeet_26-03-22_0113_0000004.npy,4.0,/home/gr1/Projects/naked-mole-rats//data/proud...,split,113
1946,proudfeet,26-03-22,single rat softchirps,proudfeet_26-03-22_0118_0000005.npy,5.0,/home/gr1/Projects/naked-mole-rats//data/proud...,split,118
1947,proudfeet,26-03-22,single rat softchirps,proudfeet_26-03-22_0781_0000007.npy,7.0,/home/gr1/Projects/naked-mole-rats//data/proud...,split,781
1948,proudfeet,26-03-22,single rat softchirps,proudfeet_26-03-22_0261_0000008.npy,8.0,/home/gr1/Projects/naked-mole-rats//data/proud...,split,261
1949,berlinannotated,00-00-00,berlin,berlinannotated_00-00-00_000_0000000.npy,0.0,/home/gr1/Projects/naked-mole-rats/data/berlin...,labeled and checked,0


### Saving updated metadata

In [44]:
# saves updated metadatafile and copies all new recordings to their new path
for rec_name, rec_path in new_recs_metadata[['name', 'path']].values:
    if not os.path.isdir(rec_path):
        os.makedirs(rec_path)
    if ( path_to_new_recordings + rec_name) != (rec_path + rec_name):
        shutil.copy2(src = path_to_new_recordings + rec_name, dst = rec_path + rec_name)
updated_metadata.to_csv(path_to_recordings_metadata + recordings_metadata_name, index=None)

In [10]:
updated_metadata[updated_metadata['colony'] == 'bird']

Unnamed: 0,colony,date,experiment,name,number,path,processing stage,ratids
