In [2]:
from pydub import AudioSegment
from pathlib import Path
import numpy as np
import IPython.display as ipd
import pandas as pd
import _pickle as pickle
import csv
import humming_data_utils as utils
import matplotlib.pyplot as plt
import torch
from sampling_utils import downsample_contour_array

In [3]:
%load_ext autoreload
%autoreload 2
import humming_data_utils as utils
selected_100, selected_900 = utils.load_meta_from_excel()
humming_db = utils.HummingDB('/home/svcapp/userdata/humming_db', '/home/svcapp/t2meta/flo_new_music/music_100k', selected_100, selected_900)

track_ids = list(set([x['track_id'] for x in humming_db.samples]))

In [None]:
def normalized_vec_to_orig(norm_contour, mean_pitch=69, std=5.5201786930065415):
    orig = np.zeros_like(norm_contour[:,0])
    orig[norm_contour[:,1]==1] = 440 * 2 ** ((norm_contour[norm_contour[:,1]==1, 0] * std + mean_pitch -69) / 12)
#     orig[norm_contour==-100] = 0
    return orig


def generate_sine_wav(melody, frame_rate=10, sr=44100):
    melody_resampled = np.repeat(melody, sr//frame_rate)
    phi = np.zeros_like(melody_resampled)
    phi[1:] = np.cumsum(2* np.pi * melody_resampled[:-1] / sr, axis=0)
    sin_wav = 0.9 * np.sin(phi)
    return sin_wav

In [None]:
torch.__version__

In [None]:
with open('humm_db_ids.dat', 'wb') as f:
    pickle.dump(track_ids, f)

In [None]:
with open('/home/svcapp/userdata/flo_melody/overlapped.dat', 'rb') as f:
    overlap_data = pickle.load(f)

In [None]:
with open('/home/svcapp/userdata/flo_melody/humm_array.dat', 'rb') as f:
    humm_array = pickle.load(f)

In [4]:
humming_pairs = [x for x in humming_db]

In [20]:
selected = [i for i,x in enumerate(humming_pairs) if x['meta']['track_id']==2799391]
selected

[280, 281, 345, 346, 347]

In [27]:
humm_audio, orig_audio, humm_data = humming_db._get_audio(selected[4])
humm_data

{'path': '/home/svcapp/userdata/humming_db/100/50~74/02_N/100_65_D_49-68_(NF_KSA).wav',
 'pitch_path': '/home/svcapp/userdata/humming_db/100/50~74/02_N/100_65_D_49-68_(NF_KSA).f0.csv',
 'song_group': '100',
 'song_idx': '65',
 'humming_type': 'D',
 'time_stamp': '49-68',
 'singer_group': 'N',
 'singer_id': 'KSA',
 'singer_gender': 'F',
 'track_id': 2799391}

In [30]:
ipd.Audio(humm_audio, rate=44100)

In [29]:
ipd.Audio(orig_audio, rate=44100)

In [12]:
humm_data

{'path': '/home/svcapp/userdata/humming_db/100/50~74/02_N/100_67_D_82-97_(NF_LYJ).wav',
 'pitch_path': '/home/svcapp/userdata/humming_db/100/50~74/02_N/100_67_D_82-97_(NF_LYJ).f0.csv',
 'song_group': '100',
 'song_idx': '67',
 'humming_type': 'D',
 'time_stamp': '82-97',
 'singer_group': 'N',
 'singer_id': 'LYJ',
 'singer_gender': 'F',
 'track_id': 30894451}

In [None]:
humm_array[0][0].shape

In [None]:
contour, orig_contour = humming_db[21]
plt.plot(contour[:,0])
plt.plot(orig_contour[:,0])

In [None]:
selected_sample = humming_pairs[0]
orig_audio_path = utils.get_orig_audio_path_by_id(selected_sample['meta']['track_id'], humming_db.audio_path)
orig_melody_path = utils.audio_path_to_pitch_path(orig_audio_path)
orig_melody = utils.load_melody_txt(orig_melody_path)
humm_melody = utils.load_crepe_pitch(selected_sample['meta']['pitch_path'])
time_pos = [int(x) for x in selected_sample['meta']['time_stamp'].split('-')]
sliced_melody = orig_melody[time_pos[0]*100 :time_pos[1]*100 ]

In [None]:
plt.plot(sliced_melody)

In [None]:
def pitch_array_to_formatted(pitch_array, mean=61.702336487738215, std=5.5201786930065415):
    output = np.zeros((len(pitch_array), 2))
    output[pitch_array!=0,1] = 1
    output[:,0] = (pitch_array - mean) / std
    output[output[:,1]==0, 0]= 0
    return output

formatted= pitch_array_to_formatted(humm_melody)

In [None]:
np.log2(humm_melody)

In [None]:
humm_array = [(downsample_contour_array(utils.get_normalized_contour_from_sample(sample)), sample['track_id'], sample['time_stamp'] ) for sample in humming_db.samples if sample['song_group']=="100"]


In [None]:
with open('/home/svcapp/userdata/flo_melody/humm_array.dat', 'wb') as f:
    pickle.dump(humm_array, f)

In [None]:
len(humm_array)

In [None]:
# plt.plot(humm_melody) 
def filter_error(array, min_clip=-3.5, max_clip=2.5, ):
    error_like_index = array[:,0] > max_clip
    error_like_index_min = array[:,0] < min_clip
    error_like_index = error_like_index + error_like_index_min
    new_array = np.copy(array)
    new_array[error_like_index, :] = 0
    return new_array

# id = 10
# plt.plot(delete_zero(humm_array[id][0])[:,0])
# plt.plot(humm_array[id][0][:,0])



In [None]:
from inference import load_model, load_hparams, load_checkpoint

ckpt_path = "/home/svcapp/userdata/flo_model/contour_scheduled_hidden256_lr0.0001_201210-154640/checkpoint_best.pt"
hparams = load_hparams(ckpt_path)
model = load_model(hparams)
model = load_checkpoint(ckpt_path, model)

In [None]:
model

In [None]:
model(torch.Tensor(formatted).unsqueeze(0).cuda())

In [None]:
# embeddings = torch.load("/home/svcapp/userdata/flo_melody/overlapped_embedding.pt")
embeddings = torch.load("/home/svcapp/userdata/flo_model/qbh_embedding.pt")

In [None]:
with open("/home/svcapp/userdata/flo_melody/overlapped.dat", "rb") as f:
    ballade_contours = pickle.load(f)

In [None]:
total_humm_ids = set([x[1] for x in humm_array]

In [None]:
from validation import cal_ndcg_single
from tqdm import tqdm

def remove_duplicate(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]

def cal_ndcg_of_loader(model, data_samples, total_embs, total_song_ids):
    model.eval()
    valid_score = 0
    recommended_ids = []
    for j, batch in enumerate(tqdm(data_samples)):
        contours, track_ids, time_pos = batch
        contours = torch.Tensor(contours).unsqueeze(0)
        anchor = model(contours.cuda())
        anchor_norm = anchor / anchor.norm(dim=1)[:, None]
        similarity = torch.mm(anchor_norm, total_embs.transpose(0,1))
        recommends = torch.topk(similarity, k=50, dim=-1)[1]
        for i in range(recommends.shape[0]):
            recommended_ids += recommends[i].cpu().numpy().tolist()
        recommends = total_song_ids[recommends]
#         ndcg = [cal_ndcg_single(recommends[i,:], track_ids) for i in range(recommends.shape[0])]
#         print(track_ids in recommends[0])
#         ndcg = sum(ndcg) / len(ndcg)
#         valid_score += ndcg
        recommends.squeeze_()
        recommends = remove_duplicate(recommends.tolist())[:10]
        if track_ids in recommends:
            score = 1 
        else:
            score = 0
        valid_score += score

    valid_score = valid_score/(j+1)
    return valid_score #, recommended_ids

# total_embs = torch.cat((embeddings['embs'], qbh_embeddings['embs']), dim=0).cuda()
# total_song_ids = torch.LongTensor(embeddings['ids'] + qbh_embeddings['ids'])
total_embs = embeddings['embs'].cuda()
total_song_ids = torch.LongTensor(embeddings['ids'])
cal_ndcg_of_loader(model, humm_array, total_embs, total_song_ids)

In [None]:
model.eval()
valid_score = 0
recommended_ids = []
batch = humm_array[0]
contours, track_ids, time_pos = batch
contours = torch.Tensor(contours).unsqueeze(0)
dummy = torch.zeros(1, 299, 2)
dummy[:,28:-28,:] = contours
anchor = model(dummy.cuda())
anchor_norm = anchor / anchor.norm(dim=1)[:, None]
similarity = torch.mm(anchor_norm, total_embs.transpose(0,1))

print(anchor[0][:10])
# recommends = torch.topk(similarity, k=50, dim=-1)[1]
# for i in range(recommends.shape[0]):
#     recommended_ids += recommends[i].cpu().numpy().tolist()
# recommends = total_song_ids[recommends]
# ndcg = [cal_ndcg_single(recommends[i,:], track_ids) for i in range(recommends.shape[0])]

# ndcg = sum(ndcg) / len(ndcg)
# valid_score = valid_score/(j+1)

In [None]:
model.fc.weight[0,:10]


In [None]:
from collections import Counter
count_id = Counter(recommend_ids)
count_id.most_common(10)

In [None]:
contours, track_ids, _ = humm_array[0]
contours = torch.Tensor(contours).unsqueeze(0)
anchor = model(contours.cuda())
anchor_norm = anchor / anchor.norm(dim=1)[:, None]
similarity = torch.mm(anchor_norm, total_embs.transpose(0,1))
#         similarity[0, j] = -100
recommends_idx = torch.topk(similarity, k=10, dim=-1)[1]
recommends = total_song_ids[recommends_idx]
ndcg = [cal_ndcg_single(recommends[i,:], track_ids) for i in range(recommends.shape[0])]
plt.plot(downsample_contour_array(ballade_contours[recommends_idx[0,0].item()]['contour'])[:,0])
plt.plot(contours[0,:,0])
# similarity[recommends_idx]

In [None]:
# play sound
humm_contour = contours[0].numpy()
humm_audio = generate_sine_wav(normalized_vec_to_orig(humm_contour))
ipd.Audio(humm_audio, rate=44100)


In [None]:
recomm_contour = downsample_contour_array(ballade_contours[recommends_idx[0,0].item()]['contour'])
recomm_audio = generate_sine_wav(normalized_vec_to_orig(recomm_contour))
ipd.Audio(recomm_audio, rate=44100)


In [None]:
plt.plot(anchor.cpu().detach().numpy()[0][:80])
plt.plot(total_embs[recommends_idx[0,0].item()].cpu().detach().numpy()[:80])

In [None]:
similarity[0][recommends_idx]

In [None]:
plt.plot(ballade_contours[recommends_idx[0,0].item()]['contour'][:,0])
plt.plot(contours[0,:,0])

In [None]:
###### contours.shape

In [None]:
0

In [None]:
target_id = 400
similarity = torch.mm(total_embs[target_id:target_id+1], total_embs.transpose(0,1))
# similarity[target_id-5:target_id+5]
similarity[0][target_id-5:target_id+5]

In [None]:
dataset = humm_array

total_embs = torch.zeros([len(dataset), model.embed_size]).to('cuda')
total_song_ids = torch.zeros(len(dataset),dtype=torch.long)
current_idx = 0
model.eval()

with torch.no_grad():
    for i in range(0, len(dataset)):
        batch = torch.Tensor(dataset[i][0]).cuda().unsqueeze(0)
        song_ids = torch.Tensor([dataset[i][1]]).cuda().unsqueeze(0)
        embeddings = model(batch)
        num_samples = song_ids.shape[0]
        total_embs[i:i+num_samples,:] = embeddings / embeddings.norm(dim=1)[:,None]
        total_song_ids[i:i+num_samples] = song_ids



In [None]:
len(dataset)

In [None]:
# total_song_lists = list(set(total_song_ids.numpy().tolist()))
# total_song_lists = list(set(qbh_embeddings['ids']))
total_song_lists = list(set(embeddings['ids']))
total_song_lists.sort()


In [None]:
# list(zip(total_song_lists, ids))

In [None]:
ids = list(set([x[1] for x in humm_array]))
ids.sort()

dummy_pos = []
dummy_neg = []
for id in ids:
    if id in total_song_lists:
        dummy_pos.append(id)
    else:
        dummy_neg.append(id)
    
print(len(dummy_pos), len(dummy_neg), dummy_pos, dummy_neg)

In [None]:
with open('flo_metadata.dat', 'rb') as f:
    metadata = pickle.load(f)

In [None]:
# Make Kor Ballade Melody segment with hop size of 5 sec
from melody_utils import MelodyLoader
from data_utils import get_song_ids_of_selected_genre
from sampling_utils import downsample_contour
melody_loader = MelodyLoader(is_quantized=False,in_midi_pitch=True)


    # selected_genres = [29]
selected_genres = [4]
song_ids = get_song_ids_of_selected_genre(metadata, selected_genre=selected_genres)
song_ids[0]

hop_size = 50
win_size = 200

overlapped_melodies = melody_loader.get_overlapped_contours(Path('/home/svcapp/userdata/flo_data_backup/427/675/pitch_427675419.txt'))
# melody_ds = downsample_contour(melody)
# melody_form = pitch_array_to_formatted(melody_ds)
# slice_pos = list(range(0, melody_form.shape[0] - win_size, hop_size))
# slice_pos.append(melody_form.shape[0] - win_size)
# overlapped_melodies = np.asarray([melody_form[i:i+win_size]for i in slice_pos if melody_form[i:i+win_size,1] > win_size/3])

In [None]:
overlapped_melodies[0]

In [None]:
plt.plot(sliced_melody)
plt.plot(humm_melody)
plt.plot()

In [None]:
assertion = [1 for sample in humming_db.samples if not Path(sample['pitch_path']).exists()]

In [None]:
sum(assertion)

In [None]:
from simplejson import load as json_load
with open('contour_tiny.json', 'rb') as f:
    pre_loaded_data = json_load(f)

In [None]:
pre_loaded_data[0]['frame_pos']

In [None]:
from melody_utils import MelodyLoader
melody_loader = MelodyLoader(is_quantized=False, in_midi_pitch=True)
melody = melody_loader.get_split_contour('/home/svcapp/userdata/flo_data_backup/433/840/pitch_433840535.txt')

In [None]:
melody[3]['frame_pos']

In [None]:
compare = np.asarray([pre_loaded_data[0]['melody'][:478], melody[3]['melody'][986:]]).T

In [None]:
mean = np.mean(compare[compare[:,0]!=0,0]), np.mean(compare[compare[:,1]!=0,1])
std = np.std(compare[compare[:,0]!=0,0]), np.std(compare[compare[:,1]!=0,1])

mean, std

In [None]:
compare[:2]

In [None]:
std = (compare[1, 1] - compare[0,1])/  (compare[1, 0] - compare[0,0])
mean = compare[0,1] - std * compare[0,0]
mean, std

In [None]:
def load_pitch_csv(pitch_path):
    with open(pitch_path, newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
    data = np.asarray(data[1:], dtype='float32')
    return data

def load_crepe_pitch(pitch_path, threshold=0.7):
    pitch_data = load_pitch_csv(pitch_path)
    pitch_data[pitch_data[:,2]<threshold, 1] = 0
    pitch_data = pitch_data[:,1]
    return pitch_data
    
pitch_path = humming_db.samples[0]['pitch_path']
pitch_data = load_crepe_pitch(pitch_path)

In [None]:
from model import CnnEncoder
import os
import torch
def load_model(hparams_path, checkpoint_path):
    with open(hparams_path, 'rb') as f:
        hparams = pickle.load(f)
    model = CnnEncoder(hparams)
    assert os.path.isfile(checkpoint_path)
    print("Loading checkpoint '{}'".format(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    model.load_state_dict(checkpoint_dict['state_dict'])
    iteration = checkpoint_dict['iteration']
    print("Loaded checkpoint '{}' from iteration {}" .format(
        checkpoint_path, iteration))
    return model

cnn_model = load_model('/home/svcapp/userdata/flo_model/contour_res/hparams.dat', '/home/svcapp/userdata/flo_model/contour_res/checkpoint_best.pt')

In [None]:
class HummingDB:
    def __init__(self, data_path, audio_path, df_a, df_b):
        self.data_path = Path(data_path)
        self.audio_path = Path(audio_path)
        self.song_list = list(self.data_path.rglob('*.wav'))
        self.samples = [make_humming_sample_dictionary(path, df_a, df_b) for path in self.song_list]
        self.num_songs = len(self.song_list)

    def __getitem__(self, index):
        selected_sample = self.samples[index]
        song_path = selected_sample['path']
        song = AudioSegment.from_file(song_path, 'wav')._data
        decoded = np.frombuffer(song, dtype=np.int16) / 32768
        
        track_id = str(selected_sample['track_id'])
        orig_audio_path = self.audio_path / track_id[:3] / track_id[3:6] / (track_id +'.aac')
        if not orig_audio_path.exists():
            orig_audio_path = orig_audio_path.with_suffix('.m4a')
        if not orig_audio_path.exists():
            orig_audio_path = self.audio_path / 'qbh' / (track_id + '.aac')
        orig_song = AudioSegment.from_file(orig_audio_path, 'm4a').set_channels(1)._data
        orig_decoded = np.frombuffer(orig_song, dtype=np.int16) / 32768
        
        time_pos = selected_sample['time_stamp'].split('-')
        start_position = int(time_pos[0]) * 44100
        end_position = int(time_pos[1]) * 44100
                        
        return decoded, orig_decoded[start_position:end_position], selected_sample


def make_humming_sample_dictionary(path, df_a, df_b):
    sample = {}
    meta = path.stem.split('_')
    sample['path'] = str(path)

    if meta[0] == "100":
        sample['song_group'], sample['song_idx'], sample['humming_type'], sample['time_stamp'], sample['singer_group'], sample['singer_id'] = meta
        sample['singer_gender'] = sample['singer_group'][2]
        sample['singer_group'] = sample['singer_group'][1]
        row = df_a.loc[df_a['file_name'] == path.name].iloc[0]
        sample['track_id'] = row['track_id']
        sample['singer_id'] = sample['singer_id'][:-1]
        
    else:
        sample['song_group'], sample['song_idx'], sample['humming_type'], sample['time_stamp'] = meta
        
        row = df_b.loc[df_b['file_name'] == path.name].iloc[0]
        sample['track_id'] = row['track_id']
        sample['singer_gender'] = row['Identification code'][1]
        sample['singer_group'] = row['Identification code'][0]
        sample['singer_id'] = row['Identification code'][-3:]

        
    return sample
    
class HummingSample:
    def __init__(self, data_path):
        self.data_path = Path(data_path)
        meta = self.data_path.stem.split('_')
    
humming_db = HummingDB('/home/svcapp/userdata/humming_db', '/home/svcapp/userdata/flo_data_backup/', selected_100, selected_900)
# audio = humming_db[1]
# ipd.Audio(audio, rate=44100)
print(humming_db.samples[0])


In [None]:
audio, orig, meta = humming_db[200]
print(meta)
ipd.Audio(audio, rate=44100)

In [None]:
ipd.Audio(orig, rate=44100)

In [None]:
len(set([x['track_id'] for x in humming_db.samples]))

In [None]:
singer_ids = set([x['singer_group'] for x in humming_db.samples])
print(singer_ids, len(singer_ids))
for ids in singer_ids:
    print(sum([1 for x in humming_db.samples if x['singer_group'] == ids]))

In [None]:
xls_file = pd.ExcelFile("/home/svcapp/userdata/humming_db/Spec.xlsx")
sheets = pd.read_excel(xls_file, sheet_name=None, header=1)
exp_id = list(sheets.keys())
selected_100 = [sheets[x] for x in exp_id[:4]]
selected_100 = pd.concat(selected_100, ignore_index=True)
selected_900 = sheets[exp_id[4]]

with open("flo_metadata.dat", "rb") as f:
    data_dict = pickle.load(f)
# for data in data_dict:
#     data['track_name'] = str(data['track_name'])
#     while data['track_name'][-1] == ' ':
#         data['track_name'] = data['track_name'][:-1]



def get_track_id(song_name, artist_name, data_dict):
    for song in data_dict:
        if song_name == str(song['track_name']) and str(artist_name) in str(song['artist_name_basket'][0]):
            return song['track_id']
    print(f"{song_name} / {artist_name}")
    
# track_ids = [get_track_id(selected_900['track_name'][x], selected_900['artist_name'][x], data_dict) for x in range(900) ]
track_ids100 =  [get_track_id(selected_100['track_name'][x], selected_100['artist_name'][x], meta_100) for x in range(500) ]

selected_100['track_id'] = track_ids100
selected_900['track_id'] = track_ids

In [None]:
meta_100 = pd.read_csv("flo_test_list.csv")
meta_100 = meta_100.rename(columns={'곡명': 'track_name', '아티스트명': 'artist_name_basket', 'track id ': 'track_id'})
meta_100['artist_name_basket'] = [[x] for x in meta_100['artist_name_basket'] ]
meta_100 = meta_100.to_dict('records')

with open('meta_100.dat', 'wb') as f:
    pickle.dump(meta_100, f)

In [None]:
selected_900['Identification code']

In [None]:

# from collections import Counter
# test = Counter(track_ids)
# test.most_common(10)

In [None]:
selected_900

In [None]:
class DataMonitor:
    def __init__(self, data_path):
        self.data_path = Path(data_path)
        if 'qbh' in data_path:
            self.song_list = list(self.data_path.rglob('*.aac'))
            self.song_list = [x.stem for x in self.song_list]
        else:
            self.song_list = np.load('song_indices_in_flo.npy')
#         self.contour_list = list(self.data_path.rglob('*.txt'))
        self.sr = 44100
        
    def get_contour(self,index):
        song_idx = self.song_list[index]
        pitch_path = self.song_idx_to_path(song_idx).parent / 'pitch_{}.txt'.format(song_idx)
        return load_melody(pitch_path)

    def get_audio(self, song_id, id1, id2):
        song_path = self.song_idx_to_path(song_id)
        audio = self.load_audio(song_path)
        audio = audio[id1:id2]
        return audio

    def load_audio(self, track_path):
        song = AudioSegment.from_file(track_path, 'm4a').set_frame_rate(self.sr).set_channels(1)._data
        decoded = np.frombuffer(song, dtype=np.int16) / 32768
        return decoded

    def song_idx_to_path(self, idx):
        idx = str(idx)
        if 'qbh' in str(self.data_path):
            path = self.data_path / (idx +'.aac')
        else:
            path = self.data_path / idx[:3] / idx[3:6] / (idx +'.aac')
        if not path.exists():
            path = path.with_suffix('.m4a')
        return path
    
    def sample_random_melody(self):
        while True:
            rand_index = random.randint(0, len(self)-1)
            contour = self.get_contour(rand_index)
            q_contour = quantizing_hz(contour)
            c_contour = clearing_note(q_contour)
            melody_indices = self.find_melody_segment(c_contour)
            if len(melody_indices) > 0:
                rand_mel_idx = random.randint(0, len(melody_indices)-1)
                a,b = melody_indices[rand_mel_idx]
                return contour[a:b]
    
    def plot_and_play(self, idx):
        contour = self.get_contour(idx)
        q_contour = quantizing_hz(contour)
        c_contour = clearing_note(q_contour)
        e_contour = elongate_note(c_contour)

        melody_indices = self.find_melody_segment(c_contour)
        if len(melody_indices) > 0:
            a,b = melody_indices[random.randint(0, len(melody_indices)-1)]
            plt.plot(contour[a:b])
#             plt.plot(q_contour[a:b])
            plt.plot(e_contour[a:b])
#             plt.plot(e_contour[a:b])
#             print(q_contour[a:b])
            audio = self.get_audio(self.song_list[idx], a*self.sr//100, b*self.sr//100)
#             audio = self.generate_sine_wav(e_contour[a:b], audio)
            audio = self.generate_sine_wav(contour[a:b], audio)

            return audio 
    
    def find_melody_segment(self, contour, threshold=50):
        return find_melody_seg_fast(contour, zero_threshold=50, max_length=2000, min_length=500)
    
    def get_segmented_contours(self, song_idx):
        pitch_path = self.song_idx_to_path(song_idx).parent / 'pitch_{}.txt'.format(song_idx)
        contour = load_melody(pitch_path)
        return [{'melody':contour[a:b], 'frame_pos':(a,b)} for (a,b) in self.find_melody_segment(contour)]
    
    def generate_sine_wav(self, melody, audio, frame_rate=100):
        melody_resampled = np.repeat(melody, self.sr//frame_rate)
        phi = np.zeros_like(melody_resampled)
        phi[1:] = np.cumsum(2* np.pi * melody_resampled[:-1] / self.sr, axis=0)
        sin_wav = 0.9 * np.sin(phi)
        sin_wav = sin_wav[:audio.shape[0]]
        return sin_wav+(audio*0.3)
    
    def __len__(self):
        return len(self.song_list)
    

def quantizing_hz(contour):
    output = []
    for pitch in contour:
        if pitch > 0:
            q_pitch = 440 * (2 ** ((round(log2(pitch/440) * 12))/12))
        else:
            q_pitch = 0
        output.append(q_pitch)
    return output

def elongate_note(q_contour, patience=10):
    output = []
    prev_pitch = 0
    non_pitch_count = 0
    for pitch in q_contour:
        if pitch > 0:
            output.append(pitch)
            prev_pitch = pitch
            non_pitch_count = 0
        else:
            non_pitch_count += 1
            if non_pitch_count > patience:
                prev_pitch = 0
                non_pitch_count = 0
            output.append(prev_pitch)
    return output

def clearing_note(q_contour, min_pitch_len=5):
    prev_pitch = 0
    prev_pitch_start = 0
    output = [x for x in q_contour]
    for i in range(len(q_contour)):
        pitch = q_contour[i]
        if pitch != prev_pitch:
            prev_pitch_duration = i - prev_pitch_start
            if prev_pitch_duration < min_pitch_len:
                output[prev_pitch_start:i] = [0] * prev_pitch_duration
            prev_pitch = pitch
            prev_pitch_start = i
    return output

def load_melody(path):
    with open(path, "r") as f:
        lines = f.readlines()
    return [float(x.split(' ')[1][:-2]) for x in lines]

def find_melody_seg_fast(contour,zero_threshold, max_length, min_length):
    zeros_slice = get_zero_slice_from_contour(contour, threshold=zero_threshold)
    voice = zero_slice_to_segment(zeros_slice)
    if voice != []:
        expand_voice(voice, max_length=max_length)
    voice = [(int(x[0]), int(x[1])) for x in voice if x[1]-x[0]>min_length]
    return voice

def get_zero_slice_from_contour(contour, threshold=50):
    contour_array = np.asarray(contour)
    is_zero_position = np.where(contour_array == 0)[0]
    diff_by_position = np.diff(is_zero_position)
    slice_pos = np.where(diff_by_position>1)[0]
    voice_frame = np.stack([is_zero_position[slice_pos]+1, is_zero_position[slice_pos] + diff_by_position[slice_pos]], axis=-1)
    if voice_frame.shape[0] == 0:
        zeros_slice = []
    else:
        zeros_slice = [ [0, voice_frame[0,0]] ] + [ [voice_frame[i-1,1], voice_frame[i,0]] for i in range(1, voice_frame.shape[0])]
        zeros_slice = [x for x in zeros_slice if x[1]-x[0] > threshold]
    return zeros_slice

def zero_slice_to_segment(zeros_slice, min_voice_seg=10):
    return [ (zeros_slice[i][1], zeros_slice[i+1][0]) for i in range(len(zeros_slice)-1) if zeros_slice[i+1][0] - zeros_slice[i][1]  >= min_voice_seg]

def expand_voice(voice_slice, max_length=2000):
    def merged_length(alist, idx):
        return alist[idx][0] + alist[idx][1] + alist[idx+1][0]
    len_and_distance = get_length_and_distance_of_melody(voice_slice)
#     valid_distances = [len_and_distance[i][1] for i in range(len(len_and_distance)-1) if len_and_distance[i][0] +len_and_distance[i+1][0]<max_length]
    valid_distances = [ len_and_distance[i][1] for i in range(len(len_and_distance)-1) if merged_length(len_and_distance, i) <max_length]
    while valid_distances:
        min_distance = min(valid_distances)
        min_index = [i for i in range(len(len_and_distance)-1) if len_and_distance[i][1] ==min_distance and  merged_length(len_and_distance, i) <max_length]
        for index in reversed(min_index):
            merge_voice_slice(voice_slice, index)
        if voice_slice == []:
            valid_distances = []
        else:
            len_and_distance = get_length_and_distance_of_melody(voice_slice)
            valid_distances = [ len_and_distance[i][1] for i in range(len(len_and_distance)-1) if merged_length(len_and_distance, i) <max_length]
    return voice_slice

def merge_voice_slice(voice_slice, index):
    first = voice_slice.pop(index)
    second = voice_slice.pop(index)
    new = (first[0], second[1])
    voice_slice.insert(index, new)

def get_length_and_distance_of_melody(voice_slice):
    return [ (voice_slice[i][1]-voice_slice[i][0], voice_slice[i+1][0]-voice_slice[i][1]) for i in range(len(voice_slice)-1)] + [(voice_slice[-1][1]-voice_slice[-1][0], 10000 )]