In [101]:
from pydub import AudioSegment
from pathlib import Path
import numpy as np
import IPython.display as ipd
import pandas as pd
import _pickle as pickle
import unidecode


In [163]:
class HummingDB:
    def __init__(self, data_path, audio_path, df_a, df_b):
        self.data_path = Path(data_path)
        self.audio_path = Path(audio_path)
        self.song_list = list(self.data_path.rglob('*.wav'))
        self.samples = [make_humming_sample_dictionary(path, df_a, df_b) for path in self.song_list]
        self.num_songs = len(self.song_list)

    def __getitem__(self, index):
        selected_sample = self.samples[index]
        song_path = selected_sample['path']
        song = AudioSegment.from_file(song_path, 'wav')._data
        decoded = np.frombuffer(song, dtype=np.int16) / 32768
        
        track_id = str(selected_sample['track_id'])
        orig_audio_path = self.audio_path / track_id[:3] / track_id[3:6] / (track_id +'.aac')
        if not orig_audio_path.exists():
            orig_audio_path = orig_audio_path.with_suffix('.m4a')
        if not orig_audio_path.exists():
            orig_audio_path = self.audio_path / 'qbh' / (track_id + '.aac')
        orig_song = AudioSegment.from_file(orig_audio_path, 'm4a').set_channels(1)._data
        orig_decoded = np.frombuffer(orig_song, dtype=np.int16) / 32768
        
        time_pos = selected_sample['time_stamp'].split('-')
        start_position = int(time_pos[0]) * 44100
        end_position = int(time_pos[1]) * 44100
                        
        return decoded, orig_decoded[start_position:end_position], selected_sample


def make_humming_sample_dictionary(path, df_a, df_b):
    sample = {}
    meta = path.stem.split('_')
    sample['path'] = str(path)

    if meta[0] == "100":
        sample['song_group'], sample['song_idx'], sample['humming_type'], sample['time_stamp'], sample['singer_group'], sample['singer_id'] = meta
        sample['singer_gender'] = sample['singer_group'][2]
        sample['singer_group'] = sample['singer_group'][1]
        row = df_a.loc[df_a['file_name'] == path.name].iloc[0]
        sample['track_id'] = row['track_id']
        sample['singer_id'] = sample['singer_id'][:-1]
        
    else:
        sample['song_group'], sample['song_idx'], sample['humming_type'], sample['time_stamp'] = meta
        
        row = df_b.loc[df_b['file_name'] == path.name].iloc[0]
        sample['track_id'] = row['track_id']
        sample['singer_gender'] = row['Identification code'][1]
        sample['singer_group'] = row['Identification code'][0]
        sample['singer_id'] = row['Identification code'][-3:]

        
    return sample
    
class HummingSample:
    def __init__(self, data_path):
        self.data_path = Path(data_path)
        meta = self.data_path.stem.split('_')
    
humming_db = HummingDB('/home/svcapp/userdata/humming_db', '/home/svcapp/userdata/flo_data_backup/', selected_100, selected_900)
# audio = humming_db[1]
# ipd.Audio(audio, rate=44100)
print(humming_db.samples[0])


{'path': '/home/svcapp/userdata/humming_db/100/0~24/01_P/100_18_D_31-51_(PF_KRJ).wav', 'song_group': '100', 'song_idx': '18', 'humming_type': 'D', 'time_stamp': '31-51', 'singer_group': 'P', 'singer_id': 'KRJ', 'singer_gender': 'F', 'track_id': 437266253}


In [167]:
audio, orig, meta = humming_db[200]
print(meta)
ipd.Audio(audio, rate=44100)

{'path': '/home/svcapp/userdata/humming_db/100/50~74/02_N/100_53_D_82-99_(NM_GHW).wav', 'song_group': '100', 'song_idx': '53', 'humming_type': 'D', 'time_stamp': '82-99', 'singer_group': 'N', 'singer_id': 'GHW', 'singer_gender': 'M', 'track_id': 420497440}


In [168]:
ipd.Audio(orig, rate=44100)

In [170]:
len(set([x['track_id'] for x in humming_db.samples]))

999

In [134]:
singer_ids = set([x['singer_group'] for x in humming_db.samples])
print(singer_ids, len(singer_ids))
for ids in singer_ids:
    print(sum([1 for x in humming_db.samples if x['singer_group'] == ids]))

{'P', 'N'} 2
560
840


In [112]:
xls_file = pd.ExcelFile("/home/svcapp/userdata/humming_db/Spec.xlsx")
sheets = pd.read_excel(xls_file, sheet_name=None, header=1)
exp_id = list(sheets.keys())
selected_100 = [sheets[x] for x in exp_id[:4]]
selected_100 = pd.concat(selected_100, ignore_index=True)
selected_900 = sheets[exp_id[4]]

with open("flo_metadata.dat", "rb") as f:
    data_dict = pickle.load(f)
# for data in data_dict:
#     data['track_name'] = str(data['track_name'])
#     while data['track_name'][-1] == ' ':
#         data['track_name'] = data['track_name'][:-1]



def get_track_id(song_name, artist_name, data_dict):
    for song in data_dict:
        if song_name == str(song['track_name']) and str(artist_name) in str(song['artist_name_basket'][0]):
            return song['track_id']
    print(f"{song_name} / {artist_name}")
    
track_ids = [get_track_id(selected_900['track_name'][x], selected_900['artist_name'][x], data_dict) for x in range(900) ]
track_ids100 =  [get_track_id(selected_100['track_name'][x], selected_100['artist_name'][x], meta_100) for x in range(500) ]

selected_100['track_id'] = track_ids100
selected_900['track_id'] = track_ids

In [123]:
selected_900['Identification code']

0      NM_PJW
1      NM_PJW
2      NM_PJW
3      NM_PJW
4      NM_PJW
        ...  
895    PM_YYS
896    PM_YYS
897    PM_YYS
898    PM_YYS
899    PM_YYS
Name: Identification code, Length: 900, dtype: object

In [72]:

# from collections import Counter
# test = Counter(track_ids)
# test.most_common(10)

In [104]:
selected_900

Unnamed: 0,Column1,album_name,artist_name,track_name,Identification code,file_name,Sampling rate,Channel,Bits,Duration,Number of Samples,Size on Memory,max_dB,track_ids
0,0,항해,AKMU (악동뮤지션),"어떻게 이별까지 사랑하겠어, 널 사랑하는 거지",NM_PJW,900_0_C_19-40.wav,48000 Hz,mono,16 bits,00:24.256,1164288,2274.13 kB,-5.23 dB,427675419
1,1,Love poem,아이유 (IU),Love poem,NM_PJW,900_1_B_15-43.wav,48000 Hz,mono,16 bits,00:32.917,1580031,3086.09 kB,-4.68 dB,433090157
2,2,늦은 밤 너의 집 앞 골목길에서,노을,늦은 밤 너의 집 앞 골목길에서,NM_PJW,900_2_C_43-66.wav,48000 Hz,mono,16 bits,00:26.112,1253376,2448.12 kB,-3.58 dB,433359099
3,3,조금 취했어,임재현,조금 취했어 (Prod. 2soo),NM_PJW,900_3_C_42-68.wav,48000 Hz,mono,16 bits,00:28.693,1377279,2690.10 kB,-3.50 dB,427664823
4,4,새 사랑,송하예,새 사랑,NM_PJW,900_4_D_15-41.wav,48000 Hz,mono,16 bits,00:30.059,1442816,2818.09 kB,-8.34 dB,431421835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,895,Honeymoon,Beach Bunny,Promises,PM_YYS,900_895_B_40-65.wav,48000 Hz,mono,16 bits,00:29.952,1437696,2808.10 kB,-7.55 dB,434093591
896,896,Sunsets & Full Moons,The Script,If You Don't Love Yourself,PM_YYS,900_896_D_24-43.wav,48000 Hz,mono,16 bits,00:21.504,1032192,2016.10 kB,-8.82 dB,431984079
897,897,Skiptracing,Mild High Club,Head Out,PM_YYS,900_897_B_25-45.wav,48000 Hz,mono,16 bits,00:22.123,1061888,2074.10 kB,-12.98 dB,30367944
898,898,Dreaming,Green Day,Dreaming,PM_YYS,900_898_B_48-67.wav,48000 Hz,mono,16 bits,00:19.819,951296,1858.10 kB,-6.71 dB,435617063


In [None]:
class DataMonitor:
    def __init__(self, data_path):
        self.data_path = Path(data_path)
        if 'qbh' in data_path:
            self.song_list = list(self.data_path.rglob('*.aac'))
            self.song_list = [x.stem for x in self.song_list]
        else:
            self.song_list = np.load('song_indices_in_flo.npy')
#         self.contour_list = list(self.data_path.rglob('*.txt'))
        self.sr = 44100
        
    def get_contour(self,index):
        song_idx = self.song_list[index]
        pitch_path = self.song_idx_to_path(song_idx).parent / 'pitch_{}.txt'.format(song_idx)
        return load_melody(pitch_path)

    def get_audio(self, song_id, id1, id2):
        song_path = self.song_idx_to_path(song_id)
        audio = self.load_audio(song_path)
        audio = audio[id1:id2]
        return audio

    def load_audio(self, track_path):
        song = AudioSegment.from_file(track_path, 'm4a').set_frame_rate(self.sr).set_channels(1)._data
        decoded = np.frombuffer(song, dtype=np.int16) / 32768
        return decoded

    def song_idx_to_path(self, idx):
        idx = str(idx)
        if 'qbh' in str(self.data_path):
            path = self.data_path / (idx +'.aac')
        else:
            path = self.data_path / idx[:3] / idx[3:6] / (idx +'.aac')
        if not path.exists():
            path = path.with_suffix('.m4a')
        return path
    
    def sample_random_melody(self):
        while True:
            rand_index = random.randint(0, len(self)-1)
            contour = self.get_contour(rand_index)
            q_contour = quantizing_hz(contour)
            c_contour = clearing_note(q_contour)
            melody_indices = self.find_melody_segment(c_contour)
            if len(melody_indices) > 0:
                rand_mel_idx = random.randint(0, len(melody_indices)-1)
                a,b = melody_indices[rand_mel_idx]
                return contour[a:b]
    
    def plot_and_play(self, idx):
        contour = self.get_contour(idx)
        q_contour = quantizing_hz(contour)
        c_contour = clearing_note(q_contour)
        e_contour = elongate_note(c_contour)

        melody_indices = self.find_melody_segment(c_contour)
        if len(melody_indices) > 0:
            a,b = melody_indices[random.randint(0, len(melody_indices)-1)]
            plt.plot(contour[a:b])
#             plt.plot(q_contour[a:b])
            plt.plot(e_contour[a:b])
#             plt.plot(e_contour[a:b])
#             print(q_contour[a:b])
            audio = self.get_audio(self.song_list[idx], a*self.sr//100, b*self.sr//100)
#             audio = self.generate_sine_wav(e_contour[a:b], audio)
            audio = self.generate_sine_wav(contour[a:b], audio)

            return audio 
    
    def find_melody_segment(self, contour, threshold=50):
        return find_melody_seg_fast(contour, zero_threshold=50, max_length=2000, min_length=500)
    
    def get_segmented_contours(self, song_idx):
        pitch_path = self.song_idx_to_path(song_idx).parent / 'pitch_{}.txt'.format(song_idx)
        contour = load_melody(pitch_path)
        return [{'melody':contour[a:b], 'frame_pos':(a,b)} for (a,b) in self.find_melody_segment(contour)]
    
    def generate_sine_wav(self, melody, audio, frame_rate=100):
        melody_resampled = np.repeat(melody, self.sr//frame_rate)
        phi = np.zeros_like(melody_resampled)
        phi[1:] = np.cumsum(2* np.pi * melody_resampled[:-1] / self.sr, axis=0)
        sin_wav = 0.9 * np.sin(phi)
        sin_wav = sin_wav[:audio.shape[0]]
        return sin_wav+(audio*0.3)
    
    def __len__(self):
        return len(self.song_list)
    

def quantizing_hz(contour):
    output = []
    for pitch in contour:
        if pitch > 0:
            q_pitch = 440 * (2 ** ((round(log2(pitch/440) * 12))/12))
        else:
            q_pitch = 0
        output.append(q_pitch)
    return output

def elongate_note(q_contour, patience=10):
    output = []
    prev_pitch = 0
    non_pitch_count = 0
    for pitch in q_contour:
        if pitch > 0:
            output.append(pitch)
            prev_pitch = pitch
            non_pitch_count = 0
        else:
            non_pitch_count += 1
            if non_pitch_count > patience:
                prev_pitch = 0
                non_pitch_count = 0
            output.append(prev_pitch)
    return output

def clearing_note(q_contour, min_pitch_len=5):
    prev_pitch = 0
    prev_pitch_start = 0
    output = [x for x in q_contour]
    for i in range(len(q_contour)):
        pitch = q_contour[i]
        if pitch != prev_pitch:
            prev_pitch_duration = i - prev_pitch_start
            if prev_pitch_duration < min_pitch_len:
                output[prev_pitch_start:i] = [0] * prev_pitch_duration
            prev_pitch = pitch
            prev_pitch_start = i
    return output

def load_melody(path):
    with open(path, "r") as f:
        lines = f.readlines()
    return [float(x.split(' ')[1][:-2]) for x in lines]

def find_melody_seg_fast(contour,zero_threshold, max_length, min_length):
    zeros_slice = get_zero_slice_from_contour(contour, threshold=zero_threshold)
    voice = zero_slice_to_segment(zeros_slice)
    if voice != []:
        expand_voice(voice, max_length=max_length)
    voice = [(int(x[0]), int(x[1])) for x in voice if x[1]-x[0]>min_length]
    return voice

def get_zero_slice_from_contour(contour, threshold=50):
    contour_array = np.asarray(contour)
    is_zero_position = np.where(contour_array == 0)[0]
    diff_by_position = np.diff(is_zero_position)
    slice_pos = np.where(diff_by_position>1)[0]
    voice_frame = np.stack([is_zero_position[slice_pos]+1, is_zero_position[slice_pos] + diff_by_position[slice_pos]], axis=-1)
    if voice_frame.shape[0] == 0:
        zeros_slice = []
    else:
        zeros_slice = [ [0, voice_frame[0,0]] ] + [ [voice_frame[i-1,1], voice_frame[i,0]] for i in range(1, voice_frame.shape[0])]
        zeros_slice = [x for x in zeros_slice if x[1]-x[0] > threshold]
    return zeros_slice

def zero_slice_to_segment(zeros_slice, min_voice_seg=10):
    return [ (zeros_slice[i][1], zeros_slice[i+1][0]) for i in range(len(zeros_slice)-1) if zeros_slice[i+1][0] - zeros_slice[i][1]  >= min_voice_seg]

def expand_voice(voice_slice, max_length=2000):
    def merged_length(alist, idx):
        return alist[idx][0] + alist[idx][1] + alist[idx+1][0]
    len_and_distance = get_length_and_distance_of_melody(voice_slice)
#     valid_distances = [len_and_distance[i][1] for i in range(len(len_and_distance)-1) if len_and_distance[i][0] +len_and_distance[i+1][0]<max_length]
    valid_distances = [ len_and_distance[i][1] for i in range(len(len_and_distance)-1) if merged_length(len_and_distance, i) <max_length]
    while valid_distances:
        min_distance = min(valid_distances)
        min_index = [i for i in range(len(len_and_distance)-1) if len_and_distance[i][1] ==min_distance and  merged_length(len_and_distance, i) <max_length]
        for index in reversed(min_index):
            merge_voice_slice(voice_slice, index)
        if voice_slice == []:
            valid_distances = []
        else:
            len_and_distance = get_length_and_distance_of_melody(voice_slice)
            valid_distances = [ len_and_distance[i][1] for i in range(len(len_and_distance)-1) if merged_length(len_and_distance, i) <max_length]
    return voice_slice

def merge_voice_slice(voice_slice, index):
    first = voice_slice.pop(index)
    second = voice_slice.pop(index)
    new = (first[0], second[1])
    voice_slice.insert(index, new)

def get_length_and_distance_of_melody(voice_slice):
    return [ (voice_slice[i][1]-voice_slice[i][0], voice_slice[i+1][0]-voice_slice[i][1]) for i in range(len(voice_slice)-1)] + [(voice_slice[-1][1]-voice_slice[-1][0], 10000 )]