In [None]:

from pathlib import Path
import _pickle as pickle
import torch
from torch.utils.data import DataLoader
from collections import defaultdict, OrderedDict
import numpy as np

from train import load_hparams, load_model, load_checkpoint, make_aug_param_dictionary
from model import CnnEncoder
from data_utils import WindowedContourSet, ContourCollate, HummingPairSet, get_song_ids_of_selected_genre
from validation import get_contour_embeddings, cal_ndcg_single
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import random
import pandas as pd
import humming_data_utils as utils




def load_model(ckpt_dir):
    model_path = Path(ckpt_dir)
    # hparams = load_hparams(model_path / 'hparams.dat')
    with open(model_path / 'hparams.dat', 'rb') as f:
        hparams = pickle.load(f)
    model = CnnEncoder(hparams).cuda()
    model, _, _, _ = load_checkpoint(model_path/'checkpoint_best.pt', model, None, train_on_humming=True)

    return model


def prepare_dataset(data_dir='/home/svcapp/userdata/flo_data_backup/', selected_genres=[4, 12, 13, 17, 10, 7,15, 11, 9], num_workers=2, min_vocal_ratio=0.5):

    with open('flo_metadata_220k.dat', 'rb') as f:
        metadata = pickle.load(f)
    with open('humm_db_ids.dat', 'rb') as f:
        humm_ids = pickle.load(f)

    song_ids = get_song_ids_of_selected_genre(metadata, selected_genre=selected_genres)
    song_ids += humm_ids
#     song_ids = humm_ids
    # song_ids = [427396913, 5466183, 30894451, 421311716, 420497440]
    entireset = WindowedContourSet(data_dir, aug_weights=[], song_ids=song_ids, set_type='entire', pre_load=False, num_aug_samples=0, num_neg_samples=0, min_vocal_ratio=min_vocal_ratio)

    entire_loader = DataLoader(entireset, 512, shuffle=True,num_workers=num_workers,
        collate_fn=ContourCollate(0, 0, for_cnn=True), pin_memory=False, drop_last=False)

    # with open(hparams.humming_path, "rb") as f:
    with open('/home/svcapp/userdata/flo_melody/humming_db_contour_pairs.dat', 'rb') as f:
        contour_pairs = pickle.load(f)

    humm_test_set = HummingPairSet(contour_pairs, [], "test",[], num_aug_samples=0, num_neg_samples=0)
    humm_test_loader = DataLoader(humm_test_set, 1, shuffle=False,num_workers=num_workers,
        collate_fn=ContourCollate(0, 0, for_cnn=True), pin_memory=True, drop_last=False)

    selected_100, selected_900 = utils.load_meta_from_excel("/home/svcapp/userdata/humming_db/Spec.xlsx")

    meta_in_song_key = {x['track_id']: x for x in metadata}
    for song in selected_100.to_dict('records'):
        meta_in_song_key[song['track_id']] = song
    for song in selected_900.to_dict('records'):
        meta_in_song_key[song['track_id']] = song
    return entire_loader, humm_test_loader, meta_in_song_key


def evaluate(model, humm_test_loader, total_embs, total_song_ids, unique_ids, index_by_id):
    model.eval()
    num_correct_answer = 0
    total_success = []
    total_recommends = []
    total_test_ids = []
    total_rank = []
    with torch.no_grad():
    #     total_embs, total_song_ids = get_contour_embeddings(model, entire_loader)
        for j, batch in enumerate(humm_test_loader):
            contours, song_ids = batch
            anchor = model(contours.cuda())
            anchor_norm = anchor / anchor.norm(dim=1)[:, None]
            similarity = torch.mm(anchor_norm, total_embs.transpose(0,1))
            max_similarity_by_song = torch.max(similarity[:,index_by_id], dim=-1)[0]

            corresp_melody_ids = torch.where(total_song_ids==song_ids)[0]
            if len(corresp_melody_ids) ==0:
                max_similarity = -1
            else:
                max_similarity = torch.max(similarity[:, corresp_melody_ids])
            max_rank = torch.sum(max_similarity_by_song > max_similarity)
            recommends = torch.topk(max_similarity_by_song, k=30, dim=-1)[1]
            recommends = unique_ids[recommends]
            top10_success = [ int(int(song_ids[i]) in recommends[i,:10].tolist()) for i in range(recommends.shape[0])]
            total_success += top10_success
            total_recommends.append(recommends)
            total_test_ids.append(song_ids)
            total_rank.append(max_rank.item())
            
            num_correct_answer += sum(top10_success)
    print(num_correct_answer / len(humm_test_loader.dataset))
    total_recommends = torch.cat(total_recommends, dim=0).cpu().numpy()
    total_test_ids = torch.cat(total_test_ids, dim=0).cpu().numpy()
    return total_recommends, total_test_ids, total_rank

def get_index_by_id(total_song_ids):
    out = []
    unique_ids = list(set(total_song_ids.tolist()))
    for id in unique_ids:
        out.append(torch.where(total_song_ids==id)[0])
    max_len = max([len(x) for x in out])
    dummy = torch.zeros((len(unique_ids), max_len), dtype=torch.long)
    for i, ids in enumerate(out):
        dummy[i,:len(ids)] = ids
        dummy[i, len(ids):] = ids[-1]
    return torch.LongTensor(unique_ids), dummy

def get_similarity_by_id(similarity, unique_ids, index_by_ids):
    return

def convert_result_to_dict(ids, ranks, meta):
    out = defaultdict(list)
    for id, r in zip(ids, ranks):
        out[meta[id]['artist_name'] + ' - ' + meta[id]['track_name']].append(r)
    return dict(out)

def save_dict_result_to_csv(adict):
    return



In [None]:
font_path = 'malgun.ttf'
font_name = fm.FontProperties(fname=font_path, size=50).get_name()
plt.rc('font', family=font_name)


In [None]:
meta[427396913]

In [None]:
flo_test_list = pd.read_csv('flo_test_list.csv')
flo_test_meta = {x['track id ']: x for x in flo_test_list.to_dict('records')}

In [None]:
flo_test_meta

In [None]:
entire_loader, humm_test_loader, meta = prepare_dataset(data_dir='/home/svcapp/t2meta/flo_new_music/music_100k/', min_vocal_ratio=0.3, selected_genres=[4])

In [None]:
# worker_ids = [480785, 401032, 482492, 482457, 483461]
worker_ids = [482492]
model_dir = Path('/home/svcapp/t2meta/qbh_model')
for id in worker_ids:
    ckpt_dir = next(model_dir.glob(f"worker_{id}*"))
    model = load_model(ckpt_dir)
    total_embs, total_song_ids = get_contour_embeddings(model, entire_loader)
    unique_ids, index_by_id = get_index_by_id(total_song_ids)
    total_recommends, total_test_ids, total_rank = evaluate(model, humm_test_loader, total_embs, total_song_ids, unique_ids, index_by_id)
    out = convert_result_to_dict(total_test_ids, total_rank, meta)
    detail_out = convert_result_to_rec_title(total_test_ids, total_recommends, total_rank, meta)
    keys = sorted(out.keys())
    rank_array = np.asarray([out[x] for x in keys])
    fig = plt.figure(figsize=(20,20))
    ax = plt.gca()
    plt.imshow(1/(rank_array+1))
    plt.colorbar()
    ax.set_yticks(list(range(len(rank_array))))
    ax.set_yticklabels(keys)
    for label in ax.get_yticklabels() :
        label.set_fontproperties(font_prop)        
    plt.savefig(f'worker_{id}_eval_matrix.png')
    
    dataframe = pd.DataFrame(detail_out).transpose()
    dataframe.to_csv(f"worker_{id}_eval_table.csv")


# 결과 표에 곡명, 장르별로 정렬, Prof/Non-prof 구별 

In [None]:
font_path = 'malgun.ttf'
font_prop = fm.FontProperties(fname=font_path, size=20)


out = convert_result_to_dict(total_test_ids, total_rank, meta)
detail_out = convert_result_to_rec_title(total_test_ids, total_recommends, total_rank, meta)

keys = sorted(out.keys())
rank_array = np.asarray([out[x] for x in keys])
fig = plt.figure(figsize=(20,20))
ax = plt.gca()
plt.imshow(1/(rank_array+1))
plt.colorbar()
ax.set_yticks(list(range(len(rank_array))))
ax.set_yticklabels(keys)
for label in ax.get_yticklabels() :
    label.set_fontproperties(font_prop)
plt.savefig(f'worker_{id}_eval_matrix.png')

dataframe = pd.DataFrame(detail_out).transpose()
dataframe.to_csv(f"worker_{id}_eval_table.csv")


In [None]:
dataframe.insert(1, 'Class', [flo_test_meta[x]['해당 요건'] for x in dataframe[0].values])

In [None]:
dataframe = dataframe.sort_values('Class')

In [124]:
humm_meta = [x['meta'] for x in humm_test_loader.dataset.contours]
humm_meta[0]

{'path': '/home/svcapp/userdata/humming_db/100/0~24/01_P/100_11_C_151-169_(PF_KRJ).wav',
 'pitch_path': '/home/svcapp/userdata/humming_db/100/0~24/01_P/100_11_C_151-169_(PF_KRJ).f0.csv',
 'song_group': '100',
 'song_idx': '11',
 'humming_type': 'C',
 'time_stamp': '151-169',
 'singer_group': 'P',
 'singer_id': 'KRJ',
 'singer_gender': 'F',
 'track_id': 31484300}

In [None]:
dataframe.insert(0, 'Class', [x['해당 요건'] for x in ])

In [None]:
meta[total_recommends[0][-1]]

In [None]:
rec_songs_in_name = []
def id_to_name(idx, meta):
    if 'artist_name' in meta[idx]:
        return f'{meta[idx]["artist_name"]} - {meta[idx]["track_name"]}'
    else:
        return f'{meta[idx]["artist_name_basket"][0]} - {meta[idx]["track_name"]}'
for rec in total_recommends:
    rec_songs_in_name.append([id_to_name(idx, meta) for idx in rec[:3]])

In [129]:
def convert_result_to_rec_title(total_test_ids, total_recommends, total_rank, meta, humm_meta, k=3):
    out = {}
    for idx in total_test_ids:
        out[meta[idx]['artist_name'] + ' - ' + meta[idx]['track_name']] = [idx] + [ [] for i in range(5)]
    
    for idx, rec, r, humm in zip(total_test_ids, total_recommends, total_rank, humm_meta):
        target = out[meta[idx]['artist_name'] + ' - ' + meta[idx]['track_name']]
        string =  "\n".join([f'Rec rank: {r+1}'] + [id_to_name(idx, meta) for idx in rec[:k]]
                            + [f'Group: {humm["singer_group"]}', f'Singer ID: {humm["singer_id"]}', f'Gender: {humm["singer_gender"]}', f'Humm type: {humm["humming_type"]}'])
        if humm['singer_group'] == 'P':
            if target[1] == []:
                target[1] = string
            else:
                target[2] =  string
        else:
            if target[3] ==[]:
                target[3] =  string
            elif target[4] ==[]:
                target[4] =  string
            else:
                target[5] =  string

    return out

test_out = convert_result_to_rec_title(total_test_ids, total_recommends, total_rank, meta, humm_meta)

dataframe = pd.DataFrame(test_out).transpose()
dataframe.to_csv(f"worker_{id}_eval_table.csv")

In [130]:
dataframe

Unnamed: 0,0,1,2,3,4,5
민수 - 민수는 혼란스럽다,31484300,Rec rank: 23\n노틸러스 - 슬픈꿈\n윤건 - 자석처럼\n사준 - 호박\n...,Rec rank: 1\n민수 - 민수는 혼란스럽다\n양요섭 - 소리 없이 운다\n양...,Rec rank: 62\n노틸러스 - 슬픈꿈\nSG워너비 - 은(恩)\n그네 - 늦...,Rec rank: 1\n민수 - 민수는 혼란스럽다\n김경호 - 영원의 성(城) (슬...,Rec rank: 111\n어쿠스틱 콜라보 - 빗속에서 With String\n거미...
Bag Raiders - Shooting Stars,427396913,Rec rank: 1\nBag Raiders - Shooting Stars\n이정현...,Rec rank: 20242\n김선우 - 소홀\n어반자카파 - For a while...,Rec rank: 1\nBag Raiders - Shooting Stars\nKha...,Rec rank: 477\n엑소케이 (EXO-K) - 월광 (Moonlight)\n...,Rec rank: 3\n데비킴 - 홈 스윗 홈\nNIKI - move!\nBag R...
Betty Who - Taste,5466183,Rec rank: 5937\n김상아 - 사랑했어요\n최예근 - 까만 얘기\nMarc...,Rec rank: 1\nBetty Who - Taste\nJK 김동욱 - 상록수 (...,Rec rank: 1288\n몽니 (Monni) - 라디오를 켜봐요\n솔빈 (LAB...,Rec rank: 1372\n켄타 - 넌 예뻐\n사라플라이 - 지구별 (feat. ...,Rec rank: 7421\nChris Brown - Come Together\nI...
Jawsh 685 & Jason Derulo - Savage Love (Laxed - Siren Beat),435806411,Rec rank: 1\nJawsh 685 & Jason Derulo - Savage...,Rec rank: 1\nJawsh 685 & Jason Derulo - Savage...,Rec rank: 4\nEXO - 기억을 걷는 밤 (Walk On Memories)...,Rec rank: 1\nJawsh 685 & Jason Derulo - Savage...,Rec rank: 3\n나문기 - 나의 소중한 사람\n시내 (Sinae) - 죽을만...
James Blake - Are You Even Real?,436045232,Rec rank: 16\n스웨덴세탁소 - 여름밤\n서영은 - 중독\nSUPER JU...,Rec rank: 98\n수림 - 강아지집\n세정 - 오리발\nKEEMBO - Th...,Rec rank: 1\nJames Blake - Are You Even Real?\...,Rec rank: 6\n전태은 - 사진\n폴킴 - 커피한잔할래요\n정호민 - 한 걸...,Rec rank: 13\n동방신기 (TVXQ!) - 소원 (Wish)\n김연우 - ...
이수영 - 얼마나 좋을까 (Final Fantasy X O.S.T),317040,Rec rank: 2\n이수영 - 얼마나 좋을까 (Orchestral)\n이수영 -...,Rec rank: 2\n이수영 - 얼마나 좋을까 (Orchestral)\n이수영 -...,Rec rank: 2\n이수영 - 얼마나 좋을까 (Orchestral)\n이수영 -...,Rec rank: 2\n이수영 - 얼마나 좋을까 (Orchestral)\n이수영 -...,Rec rank: 1\n이수영 - 얼마나 좋을까 (Final Fantasy X O....
Taylor Swift - Love Story,421881404,Rec rank: 8\n거미 - Do It..\n젝스키스 - 술끊자\n심규선 (Lu...,Rec rank: 1\nTaylor Swift - Love Story\n정준일 - ...,Rec rank: 11\nSaula - 다가와 (Each Other) (Feat. ...,Rec rank: 61\n박용인 (어반자카파) - Color Feat. 김하온(HA...,Rec rank: 38\n블락비 (Block B) - Everythin’ (U-KW...
HONNE - Warm On A Cold Night,4330817,Rec rank: 1\nHONNE - Warm On A Cold Night\n동물원...,Rec rank: 1\nHONNE - Warm On A Cold Night\n노사연...,Rec rank: 111\n은지 - 너의 온기\n김경호 - 이별보다 슬픈사랑\n무감...,Rec rank: 5\n장혜진 - 애모 (김수희)\n투영 (2Young) - 세렌디...,Rec rank: 1\nHONNE - Warm On A Cold Night\n서가인...
김현철 - Drive,433467111,Rec rank: 2\n김현철 - Drive (feat. 죠지)\n김현철 - Dri...,Rec rank: 2\n이효리 - Love Sign (feat. 상추 Of 마이티마...,Rec rank: 2\n김현철 - Drive (feat. 죠지)\n김현철 - Dri...,Rec rank: 35\n정키 (Jung Key) - 거울 (feat. 선우정아)\...,Rec rank: 2\n김현철 - Drive (feat. 죠지)\n김현철 - Dri...
Maluma - Hawai,437482491,Rec rank: 12\n청하 - 너의 온도 (Remind of You)\n포맨 -...,Rec rank: 1\nMaluma - Hawai\n윤종신 - 탈진\n대성 - BA...,Rec rank: 29\nMC 스나이퍼 - 나의 옛날이야기 (feat. Mr. Ro...,Rec rank: 1\nMaluma - Hawai\nSUPER JUNIOR-D&E ...,Rec rank: 45\n100% V - 예뻐서 그래\nAudrey Mika - Y...


In [146]:
sorted_keys = dataframe.to_dict()[0].keys()

In [151]:
sorted_out = [{x: out[x]} for x in sorted_keys]
sorted_out

[{'민수 - 민수는 혼란스럽다': [22, 0, 61, 0, 110]},
 {'Bag Raiders - Shooting Stars': [0, 20241, 0, 476, 2]},
 {'Betty Who - Taste': [5936, 0, 1287, 1371, 7420]},
 {'Jawsh 685 & Jason Derulo - Savage Love (Laxed - Siren Beat)': [0,
   0,
   3,
   0,
   2]},
 {'James Blake - Are You Even Real?': [15, 97, 0, 5, 12]},
 {'이수영 - 얼마나 좋을까 (Final Fantasy X O.S.T)': [1, 1, 1, 1, 0]},
 {'Taylor Swift - Love Story': [7, 0, 10, 60, 37]},
 {'HONNE - Warm On A Cold Night': [0, 0, 110, 4, 0]},
 {'김현철 - Drive': [1, 1, 1, 34, 1]},
 {'Maluma - Hawai': [11, 0, 28, 0, 44]},
 {'Chantal Chamberland - La Mer': [0, 0, 155, 9, 0]},
 {'Pawl, Discrete - Type Like That': [1, 1393, 13, 91, 135]},
 {'Electric Light Orchestra - Mr. Blue Sky': [689, 2, 13339, 3525, 4168]},
 {'Ryan Gosling& Emma Stone - City Of Stars (From "La La Land" Soundtrack)': [1975,
   6,
   0,
   3,
   1]},
 {'M83 - Wait': [20777, 20777, 20777, 20777, 20777]},
 {'MAX & Quinn XCII - Love Me Less': [0, 0, 0, 1433, 0]},
 {"4 Non Blondes - What's Up": [2, 0

In [138]:
test[0]

{0: 31484300,
 1: 'Rec rank: 23\n노틸러스 - 슬픈꿈\n윤건 - 자석처럼\n사준 - 호박\nGroup: P\nGender: F\nHumm type: C',
 2: 'Rec rank: 1\n민수 - 민수는 혼란스럽다\n양요섭 - 소리 없이 운다\n양요섭 - 소리없이 운다\nGroup: P\nGender: F\nHumm type: D',
 3: 'Rec rank: 62\n노틸러스 - 슬픈꿈\nSG워너비 - 은(恩)\n그네 - 늦여름 (Vocal. 윤종현, 이윤진)\nGroup: N\nGender: M\nHumm type: A',
 4: 'Rec rank: 1\n민수 - 민수는 혼란스럽다\n김경호 - 영원의 성(城) (슬픈 영혼의 아리아2)\n문별 (마마무) - 어제처럼\nGroup: N\nGender: M\nHumm type: B',
 5: 'Rec rank: 111\n어쿠스틱 콜라보 - 빗속에서 With String\n거미 - Love Again (feat. 하동균 Of Wanted)\n굿나잇스탠드 - 꿈으로 와요\nGroup: N\nGender: M\nHumm type: B'}

In [143]:
test[0].keys()

dict_keys(['민수 - 민수는 혼란스럽다', 'Bag Raiders - Shooting Stars', 'Betty Who - Taste', 'Jawsh 685 & Jason Derulo - Savage Love (Laxed - Siren Beat)', 'James Blake - Are You Even Real?', '이수영 - 얼마나 좋을까 (Final Fantasy X O.S.T)', 'Taylor Swift - Love Story', 'HONNE - Warm On A Cold Night', '김현철 - Drive', 'Maluma - Hawai', 'Chantal Chamberland - La Mer', 'Pawl, Discrete - Type Like That', 'Electric Light Orchestra - Mr. Blue Sky', 'Ryan Gosling& Emma Stone - City Of Stars (From "La La Land" Soundtrack)', 'M83 - Wait', 'MAX & Quinn XCII - Love Me Less', "4 Non Blondes - What's Up", 'Pet Shop Boys - Go West', 'Mika - Rio', 'Bonny M - Gotta Go Home', '영탁 - 찐이야', '데이브레이크 - 꽃길만 걷게 해줄게', 'The Cardigans - Lovefool', 'Lady GaGa& Bradley Cooper - Shallow', "Ariana Grande - They Don't Know", '김사월 - 누군가에게', 'ADOY(아도이) - Wonder', '브로콜리너마저 - 사랑한다는 말로도 위로가 되지 않는', 'John Legend - Someday (From the August Rush Soundtrack)', '가을방학 - 가끔 미치도록 네가 안고 싶어질 때가 있어', 'Adam Levine - Lost Stars', "김윤아 - 봄날은 간다 - 영화 '봄날은 간

In [142]:
test

{0: {'민수 - 민수는 혼란스럽다': 31484300,
  'Bag Raiders - Shooting Stars': 427396913,
  'Betty Who - Taste': 5466183,
  'Jawsh 685 & Jason Derulo - Savage Love (Laxed - Siren Beat)': 435806411,
  'James Blake - Are You Even Real?': 436045232,
  '이수영 - 얼마나 좋을까 (Final Fantasy X O.S.T)': 317040,
  'Taylor Swift - Love Story': 421881404,
  'HONNE - Warm On A Cold Night': 4330817,
  '김현철 - Drive': 433467111,
  'Maluma - Hawai': 437482491,
  'Chantal Chamberland - La Mer': 18033477,
  'Pawl, Discrete - Type Like That': 437200095,
  'Electric Light Orchestra - Mr. Blue Sky': 2799391,
  'Ryan Gosling& Emma Stone - City Of Stars (From "La La Land" Soundtrack)': 4817432,
  'M83 - Wait': 30894451,
  'MAX & Quinn XCII - Love Me Less': 420510466,
  "4 Non Blondes - What's Up": 80169765,
  'Pet Shop Boys - Go West': 2114588,
  'Mika - Rio': 421311716,
  'Bonny M - Gotta Go Home': 420497440,
  '영탁 - 찐이야': 434425125,
  '데이브레이크 - 꽃길만 걷게 해줄게': 4740685,
  'The Cardigans - Lovefool': 263908,
  'Lady GaGa& Bradley