# capoデータにlightFMを適用してみる

In [25]:
from pathlib import Path
import sys,os
sys.path.append(os.pardir)
from tools.preprocess.common import CommonPreprocessor
from tools.preprocess.interaction_matrix_generator import InteractionMatrixGenerator
from tools.preprocess.bow_vectorizer import BOWVectorizer
from scipy.sparse import csr_matrix

## data部分

In [26]:
orignal_path=Path("../data/preprocessed_50k.txt")
bow_threshold=10000

In [27]:
cp=CommonPreprocessor(rare_capo_list=['capo6', 'capo7', "whole_down"],test_rate=0.2,split_seed=0)
songs=cp.get_song_list(orignal_path,shuffle=True)
songs=cp.remove_rare_capo_song(songs)
songs_train, songs_test=cp.split_dataset(songs,shuffle=False)

In [28]:
generator=InteractionMatrixGenerator(test_rate=0.1)
X_train,X_test=generator.generate_matrices(songs)
X_train=csr_matrix(X_train)
X_test=csr_matrix(X_test)

In [29]:
chord_stat_train=cp.retrieve_chord_stat(songs_train)
capo_stat_train=cp.retrieve_capo_stat(songs_train)

In [30]:
vectorizer=BOWVectorizer(chord_stat=chord_stat_train,threshold=bow_threshold)

In [31]:
songs_features=vectorizer.get_chord_features(songs)
print(songs_features.shape)
songs_features=csr_matrix(songs_features)

(49882, 62)


## model部分

In [32]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [33]:
import numpy as np
def get_prediction(model,n_songs):
    prediction=[]
    for i in range(n_songs):
        prediction.append(model.predict(user_ids=i,item_ids=[0,1,2,3,4,5,6]))
    return np.array(prediction)

In [34]:
def train_and_evaluate(model,X_train,X_test,songs_features,epoch,k):
    model.fit(X_train,item_features=songs_features, epochs=epoch)
    train_precision = precision_at_k(model, X_train,item_features=songs_features, k=k).mean()
    test_precision = precision_at_k(model, X_test,item_features=songs_features, k=k).mean()

    train_auc = auc_score(model, X_train, item_features=songs_features).mean()
    test_auc = auc_score(model, X_test, item_features=songs_features).mean()

    print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
    print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")
    return model

In [35]:
embedding_dim=8
lr=0.01
epoch=100
k=3

In [36]:
warp_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="warp")
warp_model=train_and_evaluate(warp_model,X_train,X_test,songs_features,epoch,k)

Precision`@3: train 0.33, test 0.20.
AUC: train 0.96, test 0.66.


In [37]:
bpr_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="bpr")
bpr_model=train_and_evaluate(bpr_model,X_train,X_test,songs_features,epoch,k)

Precision`@3: train 0.33, test 0.15.
AUC: train 1.00, test 0.49.


In [38]:
log_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="logistic")
log_model=train_and_evaluate(log_model,X_train,X_test,songs_features,epoch,k)

Precision`@3: train 0.21, test 0.20.
AUC: train 0.56, test 0.56.


In [39]:
from tools.model.baseline import BaselineModel
baseline_model=BaselineModel(generator,capo_stat_train)
baseline_model=train_and_evaluate(baseline_model,X_train,X_test,songs_features,epoch,k)

Precision`@3: train 0.10, test 0.10.
AUC: train 0.42, test 0.42.


In [40]:
def get_chord_embeddings(model,vectorizer):
    chord_embeddings={}
    for chord, idx in vectorizer.chord_encoder.items():
        chord_embeddings[chord]=model.item_embeddings[idx].tolist()
    return chord_embeddings

def get_capo_embeddings(model,generator):
    capo_embeddings={}
    capo_names=[hoge.split("=")[-1] for hoge in generator.capo_encoder.get_feature_names()]
    for i, capo  in enumerate(capo_names):
        capo_embeddings[capo]=model.user_embeddings[i].tolist()
    return capo_embeddings

def get_embeddings(model,vectorzier,generator):
    return {"capo":get_capo_embeddings(model,generator),
    "chord": get_chord_embeddings(model,vectorizer)}

In [41]:
embeddings=get_embeddings(warp_model,vectorizer,generator)

In [42]:
embeddings["capo"]

{'capo0': [-0.17173217236995697,
  -0.28994613885879517,
  0.03371436893939972,
  0.15035808086395264,
  -0.007538539823144674,
  0.2903346121311188,
  -0.1328415423631668,
  -0.26164036989212036],
 'capo1': [0.1708698272705078,
  0.15502940118312836,
  -0.08614228665828705,
  -0.12300711125135422,
  0.18529696762561798,
  -0.2071530520915985,
  -0.021236319094896317,
  0.4008362591266632],
 'capo2': [0.055184729397296906,
  -0.044427741318941116,
  -0.057971011847257614,
  0.06903176009654999,
  -0.017439058050513268,
  0.0023666061460971832,
  -0.040703557431697845,
  0.045127056539058685],
 'capo3': [0.06850621849298477,
  -0.13224828243255615,
  0.19449417293071747,
  -0.014965650625526905,
  -0.06725962460041046,
  0.05700911208987236,
  -0.03453964740037918,
  -0.12694163620471954],
 'capo4': [0.02796442247927189,
  -0.10631749033927917,
  0.13460029661655426,
  0.009514633566141129,
  -0.05003681033849716,
  0.056787267327308655,
  -0.010950244963169098,
  -0.08171254396438599],

In [43]:
embeddings["chord"]

{'A': [0.0599532425403595,
  0.7563846707344055,
  -0.4962482750415802,
  -0.1591922640800476,
  0.3232082724571228,
  -0.3287661075592041,
  0.550851047039032,
  0.5452899932861328],
 'A#': [0.034706491976976395,
  -0.7941849827766418,
  0.5138146281242371,
  0.30364447832107544,
  -0.4181346297264099,
  0.3437502682209015,
  -0.0647716075181961,
  -0.6854865550994873],
 'A#7': [0.049209509044885635,
  0.0845315232872963,
  0.08878011256456375,
  -0.07221011817455292,
  -0.06734534353017807,
  0.02459707483649254,
  -0.10832894593477249,
  0.11057920753955841],
 'A#m': [-0.3432713747024536,
  -0.3007313013076782,
  -0.07559099048376083,
  0.37068963050842285,
  0.02733292430639267,
  0.43236204981803894,
  -0.03097255900502205,
  -0.4598220884799957],
 'A#m7': [0.17733991146087646,
  0.3143216073513031,
  0.3047040104866028,
  0.041403040289878845,
  -0.16750459372997284,
  0.06135760247707367,
  -0.5967341065406799,
  0.11339882016181946],
 'A#maj7': [-0.016302017495036125,
  -0.0041

 ## jsonに保存

In [44]:
import json
from pathlib import Path
json_path=Path(f"../result/embeddings_{bow_threshold}_{embedding_dim}.json")
with json_path.open("w") as f:
    json.dump(embeddings,f)