# capoデータにlightFMを適用してみる

In [51]:
from pathlib import Path
import sys,os
sys.path.append(os.pardir)
from tools.preprocess.common import CommonPreprocessor
from tools.preprocess.interaction_matrix_generator import InteractionMatrixGenerator
from tools.preprocess.bow_vectorizer import BOWVectorizer
from scipy.sparse import csr_matrix

## data部分

In [52]:
orignal_path=Path("../data/original.txt")
bow_threshold=10000

In [53]:
cp=CommonPreprocessor(rare_capo_list=['-6', '-7'],test_rate=0.2,split_seed=0)
songs=cp.get_song_list(orignal_path,shuffle=True)
songs=cp.remove_rare_capo_song(songs)
songs_train, songs_test=cp.split_dataset(songs,shuffle=False)

In [54]:
generator=InteractionMatrixGenerator(test_rate=0.1)
X_train,X_test=generator.generate_matrices(songs)
X_train=csr_matrix(X_train)
X_test=csr_matrix(X_test)

In [55]:
chord_stat_train=cp.retrieve_chord_stat(songs_train)
capo_stat_train=cp.retrieve_capo_stat(songs_train)

In [56]:
vectorizer=BOWVectorizer(chord_stat=chord_stat_train,threshold=bow_threshold)

In [57]:
songs_features=vectorizer.get_chord_features(songs)
print(songs_features.shape)
songs_features=csr_matrix(songs_features)

(39995, 60)


## model部分

In [58]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [59]:
import numpy as np
def get_prediction(model,n_songs):
    prediction=[]
    for i in range(n_songs):
        prediction.append(model.predict(user_ids=i,item_ids=[0,1,2,3,4,5,6]))
    return np.array(prediction)

In [90]:
embedding_dim=5
lr=0.01
epoch=100
k=2

In [91]:
warp_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="warp")
warp_model.fit(X_train,item_features=songs_features, epochs=epoch)

train_precision = precision_at_k(warp_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(warp_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(warp_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(warp_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.50, test 0.27.
AUC: train 0.98, test 0.71.


In [92]:
from tools.model.baseline import BaselineModel
baseline_model=BaselineModel(generator,capo_stat_train)

train_precision = precision_at_k(baseline_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(baseline_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(baseline_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(baseline_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.06, test 0.06.
AUC: train 0.30, test 0.29.


In [93]:
def get_chord_embeddings(model,vectorizer):
    chord_embeddings={}
    for chord, idx in vectorizer.chord_encoder.items():
        chord_embeddings[chord]=model.item_embeddings[idx].tolist()
    return chord_embeddings

def get_capo_embeddings(model,generator):
    capo_embeddings={}
    capo_names=[hoge.split("=")[-1] for hoge in generator.capo_encoder.get_feature_names()]
    for i, capo  in enumerate(capo_names):
        capo_embeddings[capo]=model.user_embeddings[i].tolist()
    return capo_embeddings

def get_embeddings(model,vectorzier,generator):
    return {"capo":get_capo_embeddings(model,generator),
    "chord": get_chord_embeddings(model,vectorizer)}

In [94]:
embeddings=get_embeddings(warp_model,vectorizer,generator)

In [95]:
embeddings["capo"]

{'-1': [-0.07792773097753525,
  -0.060524653643369675,
  0.07726976275444031,
  -0.02385975979268551,
  0.04562262445688248],
 '-2': [-0.01262444257736206,
  0.02286733314394951,
  0.05109073221683502,
  0.09203898161649704,
  -0.1305168867111206],
 '-3': [-0.1581263691186905,
  0.06912362575531006,
  0.051874157041311264,
  -0.05105556175112724,
  -0.04609163850545883],
 '-4': [-0.011947999708354473,
  0.15394474565982819,
  0.036029472947120667,
  0.04824259132146835,
  -0.10671638697385788],
 '-5': [0.02861279994249344,
  -0.04811745136976242,
  -0.11284653842449188,
  -0.010652131401002407,
  -0.1007041409611702],
 '0': [0.01945549063384533,
  -0.07956021279096603,
  0.08099450170993805,
  0.07825532555580139,
  -0.10778560489416122],
 '1': [-0.0443434901535511,
  0.10406459867954254,
  -0.1490936130285263,
  -0.12279641628265381,
  0.14582769572734833]}

In [96]:
embeddings["chord"]

{'A': [-0.7326207756996155,
  -0.0027221699710935354,
  0.8521740436553955,
  1.2636363506317139,
  0.8719007968902588],
 'A#': [0.3199099898338318,
  0.816183865070343,
  -1.1748534440994263,
  -0.64027339220047,
  0.8154101967811584],
 'A#7': [0.18209514021873474,
  -0.21348799765110016,
  -0.04985456541180611,
  0.06816589832305908,
  -0.26074424386024475],
 'A#m': [0.10115273296833038,
  -0.27527058124542236,
  0.08862750977277756,
  0.11396296322345734,
  -0.16653187572956085],
 'A#m7': [-0.720095157623291,
  0.45833858847618103,
  0.15096677839756012,
  0.09202687442302704,
  0.5935817360877991],
 'A#maj7': [-0.04906754940748215,
  -0.0015734357293695211,
  -0.05890190601348877,
  0.03877867013216019,
  0.11225204169750214],
 'A7': [-0.13762083649635315,
  0.0920998826622963,
  -0.19665668904781342,
  0.013805177994072437,
  0.06146305426955223],
 'Am': [0.4043927788734436,
  0.8669922947883606,
  0.8872343897819519,
  -1.4591082334518433,
  -1.0541073083877563],
 'Am7': [0.38820

 # jsonに保存

In [97]:
import json
from pathlib import Path
json_path=Path(f"../result/embeddings_{bow_threshold}_{embedding_dim}.json")
with json_path.open("w") as f:
    json.dump(embeddings,f)