In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

In [2]:
import os
import pandas as pd
import numpy as np
import librosa
from skimage.transform import resize
import matplotlib.pyplot as plt
from collections import defaultdict
import re
from src.preprocessing.filters import filter_recs_metadata
from src.util import make_image
from src.data_representations.process_traces import *

%matplotlib inline

In [3]:
import pandas as pd
import numpy as np
import sys
import os


def find_all(a_str, sub):
    start = 0
    while True:
        start = a_str.find(sub, start)
        if start == -1:
            return
        yield start
        start += len(sub)


def change_path(old_path):
    all_backslashes = list(find_all(old_path, '/'))
    col_folder = old_path[all_backslashes[-3]:]
    return PATH_TO_PROJECT + DATA_FOLDER_NAME + col_folder

DATA_FOLDER_NAME = 'data_redo'
print('Path to the project:\n', PATH_TO_PROJECT)

recs_metadata = pd.read_csv(PATH_TO_PROJECT + '%s/recordings_metadata.csv' % DATA_FOLDER_NAME, )
recs_metadata_new = pd.DataFrame(np.copy(recs_metadata), columns=recs_metadata.columns)
recs_metadata_new['path'] = recs_metadata_new['path'].apply(change_path)
recs_metadata_new.to_csv(PATH_TO_PROJECT + '%s/recordings_metadata.csv' % DATA_FOLDER_NAME, index=False)

Path to the project:
 /home/gr1/Projects/Naked-mole-rat-voices/


In [4]:
import shutil
for col in ['stark', 'lannister', 'baratheon']:
    p = PATH_TO_PROJECT  + 'data_redo/%s/' % col
    files = [f for f in os.listdir(p) if ('npy' in f) or ('txt' in f)]
    if not os.path.isdir(p + 'recordings'):
        os.makedirs(p + 'recordings')
    for f in files:
        shutil.move(p + f, p + 'recordings/' + f)
    
    

### Paramteters

In [7]:
# load metadata
path_to_recordings_metadata = PATH_TO_PROJECT + 'data_redo/'
recordings_metadata_name = 'recordings_metadata.csv'

# what to check
colonies = ['stark']
dates = 'all'
experiments = 'all'
stages = ['labeled']

# load data
recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)
filters_mask = filter_recs_metadata(recs_metadata, dates = dates, colonies = colonies, 
                                         stages = stages, experiments = experiments)
recs_to_check = recs_metadata[filters_mask]
print(recs_to_check.shape)
recs_to_check.head(10)

(0, 8)


  f"evaluating in Python space because the {repr(op_str)} "


Unnamed: 0,colony,date,experiment,name,number,path,processing stage,ratids


### Setting abbreviations for different classes

In [None]:
# setting abbreviations for classes

classes_shortcuts = {'combo' : ['cm', 'cmb',],
                     'combotwo': ['cm2', 'cmtwo', 'cmbtwo', 'cmb2', 'combo2'],
                     'downsweep': ['dw', 'ds', 'dsw', ],
                     'grunt': ['gr',], 
                     'loudchirp': ['lc', 'ldch', 'lchp'],
                     'mordent':['mr'],
                     'vtrill':['vt'],
                     'noise': ['ns', 'n', ],
                     'scream': ['scrm'],
                     'softchirp': ['sc', 'sfch', 'sch'],
                     'upsweep': ['up', 'usw', 'uw'],
                     'weirdo': ['wr', 'wrd', ],
                     'phee':['ph'],
                     'hiccup':['hi'],
                     'pupcheveron':['pch'],
                     'tweet':['tw'],
                     'pupsquawk':['psq'],
                     'pupcombo':['pcm'],
                     'tap':['tp'],
                     'RFIDscanner': ['scan'],
                     'whistle' : ['wh', 'wstl', 'wst', 'whs'],
                     'badsplit' : ['bs', 'bad', ]}

input_decoder  = {shortcut : cl for cl in classes_shortcuts for shortcut in classes_shortcuts[cl]}
def decode(inp, old_class):
    if inp in input_decoder:
        return input_decoder[inp]
    elif inp == '':
        return old_class
    else:
        return 'error'
    


# Checking

In [None]:
update_stage  = True
check_certain_classes = True
classes_to_check = 'all'
classes_to_ignore = ['noise', 'softchirp', 'weirdo', 'badsplit']
#classes_to_ignore = [None]
recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)
filters_mask = filter_recs_metadata(recs_metadata, dates = dates, colonies = colonies, 
                                         stages = stages, experiments = experiments)
sr = 22050
recs_to_check = recs_metadata[filters_mask]
total_recs = len(recs_to_check)
print('%d recs to check' % len(recs_to_check))
checked = 0
for rec_path, rec_name in recs_to_check[['path', 'name']].values:
    checked += 1
    rec_ind = np.where(recs_metadata['name'] == rec_name)[0][0]
    rec_stage = recs_metadata['processing stage'][recs_metadata['name'] == rec_name].iloc[0]
    rec_new_stage = rec_stage + ' and checked'

    print('Type shortcut for class to make relabel into this class',
          'Type nothing if label is good as it is')
    
    df_txt = pd.read_csv(rec_path + rec_name.replace('npy', 'txt'), sep = '\t')
    rec = np.load(rec_path + rec_name)
    print('Checking %d sounds from the recording %s' % (len(df_txt[~df_txt['cl'].isin(classes_to_ignore)]),
                                                        rec_name))
    for ind in range(len(df_txt)):
        s, e = df_txt[['s', 'e']].iloc[ind]
        cl = df_txt['cl'].iloc[ind]
        if check_certain_classes:
            if cl in classes_to_ignore:
                continue
            if classes_to_check != 'all' and cl not in classes_to_check:
                continue

        s_ind = int(sr * s)
        e_ind = int(sr * e)
        sound = rec[s_ind : e_ind]
        im = make_image(sound)
        plt.imshow(im)
        plt.title(cl)
        plt.show()
        inp = input()
        new_cl = decode(inp, cl)

        while new_cl == 'error':
            print('Abbreviation not recognized, repeat!')
            inp = input()
            new_cl = decode(inp, cl)
        if new_cl != cl:
            print('Relabeled into %s' % new_cl)
        else:
            print('Keeping label %s' % cl)
            
        df_txt['cl'].iloc[ind] = new_cl

    print('Saving updated txt, updating recordings processing stage to', rec_new_stage)
    print('Checked %d/%d' %(checked, total_recs))
    if update_stage:
        recs_metadata['processing stage'].iloc[rec_ind] = rec_new_stage
    
    recs_metadata.to_csv(path_to_recordings_metadata + recordings_metadata_name, index =  None)
    df_txt.to_csv(rec_path + rec_name.replace('npy',  'txt'), index = None, sep = '\t')
    