클러스터 별 태그를 로드한 후 가장 유사한 클러스터의 곡들과 비교 ?
- 유사도를 어떻게 판단할 것인가?
- 유사한 클러스터를 몇 개 뽑을 것인가?

감정(슬픔/분노/기쁨/중립/걱정) + 키워드

In [71]:
import sqlite3
from gensim import models

def load_cluster_tags(DB = '../flask_api/db.db'):
    try:
        conn = sqlite3.connect(DB)
        cur = conn.cursor()
    except:
        print("DB Connection Error!")
        return
    
    cur.execute("SELECT COUNT(*)/10 FROM CLUSTER")
    n_clusters = cur.fetchall()[0][0]
    
    cluster_tags = []
    try:
        for i in range(n_clusters):
            cur.execute("SELECT tag FROM CLUSTER WHERE label = ?", [i])
            cluster_tags.append([tag for tag, in cur.fetchall()])
    except:
        pass
    finally:
        conn.commit()
        conn.close()
        
    return cluster_tags

def find_similar_clusters(model, cluster_tags, **kwargs):
    # Weight: emotion - 1, keywords - 0.1 (pending)
    # fasttext model for similarity between two keywords 
    emotion = kwargs['emotion']
    keywords = kwargs['keywords']
    
    similarities = []
    for cluster_tag in cluster_tags:
        similarity = 0
        for tag in cluster_tag:
            similarity += model.wv.similarity(emotion, tag)
            keyword_sim = 0
            for keyword in keywords:
                keyword_sim += model.wv.similarity(keyword, tag) * 0.1
        if keyword_sim: keyword_sim /= len(keywords)
        similarities.append(similarity + keyword_sim)
    
    return similarities

In [61]:
model = models.fasttext.load_facebook_model('./cc.ko.300.bin')

In [67]:
cluster_tags = load_cluster_tags()

In [74]:
find_similar_clusters(model, cluster_tags, emotion = "기쁨", keywords = [])

[1.5963351279497147,
 2.1495985835790634,
 2.9025616496801376,
 2.7706657452508807,
 1.4133879807777703,
 2.0745216561481357,
 1.2758558308705688,
 2.2880439264699817,
 1.959825333673507,
 2.5492388186976314,
 2.159148315899074,
 2.548985364846885,
 1.4135959353297949,
 2.566300648264587,
 2.617619293741882,
 1.8002654192969203,
 1.5150525718927383,
 1.5791225624270737,
 2.4161046892404556,
 2.3358403369784355,
 1.6840440821833909,
 2.241004350595176,
 1.941720798611641,
 2.1663032844662666,
 2.528794097714126,
 2.322473131120205,
 1.9113315865397453,
 2.059613729827106,
 2.1522522941231728,
 2.95095784496516,
 1.5732458112761378,
 2.3533596843481064,
 2.590305581688881,
 1.2679904513061047,
 0.902123770210892,
 1.8324339133687317,
 2.93731180857867,
 1.5101235047914088,
 1.874179185833782,
 1.9253343371674418,
 2.4730999721214175,
 1.6810683300718665,
 1.914124257862568,
 2.909645969979465,
 2.0620850129052997,
 1.8666243059560657,
 2.6435288144275546,
 1.3718826253898442,
 2.33965046

In [32]:
# from gensim import models
# model = models.fasttext.load_facebook_model('./cc.ko.300.bin')
# model.wv.similarity('슬픔', '기쁨')

In [34]:
tags

[['그루브', '드라이브', '알앤비', '기분전환', '힙합', '트렌디', '내적댄스', '매장음악', '감성', '여행'],
 ['신나는', '기분전환', '드라이브', '여행', '감성', '매장음악', '스트레스', '여름', '취향저격', '휴식'],
 ['산책', '여행', '기분전환', '힐링', '휴식', '드라이브', '설렘', '사랑', '새벽', '까페'],
 ['회상', '추억', '설렘', '사랑', '힐링', '휴식', '발라드', '기분전환', '새벽', '잔잔한'],
 ['rnb', '알앤비', '힙합', '감성', '소울', '분위기', '드라이브', 'pop', '새벽', '잔잔한'],
 ['휴식', '힐링', '잔잔한', '기분전환', '드라이브', '새벽', '발라드', '감성', '힙합', '스트레스'],
 ['아이돌', '댄스', '신나는', '발라드', '기분전환', '수록곡', '드라이브', '스트레스', '숨은명곡', '방탄소년단'],
 ['새벽감성', '새벽', '감성', '겨울', '잔잔한', '휴식', '가을', '발라드', '기분전환', '힐링'],
 ['pop', '드라이브', '기분전환', '감성', '매장음악', '휴식', '힐링', '새벽', '잔잔한', '카페'],
 ['소울', '알앤비', '발라드', '사랑', '힐링', '설렘', '휴식', '새벽', '추억', '기분전환'],
 ['인디', '감성', '잔잔한', '카페', '새벽', '발라드', '사랑', '휴식', '기분전환', '드라이브'],
 ['까페', '힐링', '휴식', '설렘', '사랑', '기분전환', '잔잔한', '매장음악', '새벽', '발라드'],
 ['kpop', '댄스', '아이돌', '신나는', '걸그룹', '드라이브', '댄스곡', '기분전환', '운동할때', '운동'],
 ['달달', '사랑', '설렘', '듀엣', '기분전환', '취향저격', '휴식', '발라드', '카페', '힐링'],
 ['명곡', '추억