# capoデータにlightFMを適用してみる

In [109]:
from pathlib import Path
import sys,os
sys.path.append(os.pardir)
from tools.preprocess.common import CommonPreprocessor
from tools.preprocess.interaction_matrix_generator import InteractionMatrixGenerator
from tools.preprocess.bow_vectorizer import BOWVectorizer
from scipy.sparse import csr_matrix

## data部分

In [110]:
orignal_path=Path("../data/preprocessed_50k.txt")
bow_threshold=10000

In [127]:
cp=CommonPreprocessor(rare_capo_list=['capo6', 'capo7', "whole_down"],test_rate=0.2,split_seed=0)
songs=cp.get_song_list(orignal_path,shuffle=True)
songs=cp.remove_rare_capo_song(songs)
songs_train, songs_test=cp.split_dataset(songs,shuffle=False)

In [128]:
generator=InteractionMatrixGenerator(test_rate=0.1)
X_train,X_test=generator.generate_matrices(songs)
X_train=csr_matrix(X_train)
X_test=csr_matrix(X_test)

In [129]:
chord_stat_train=cp.retrieve_chord_stat(songs_train)
capo_stat_train=cp.retrieve_capo_stat(songs_train)

In [130]:
vectorizer=BOWVectorizer(chord_stat=chord_stat_train,threshold=bow_threshold)

In [131]:
songs_features=vectorizer.get_chord_features(songs)
print(songs_features.shape)
songs_features=csr_matrix(songs_features)

(49882, 62)


## model部分

In [132]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [133]:
import numpy as np
def get_prediction(model,n_songs):
    prediction=[]
    for i in range(n_songs):
        prediction.append(model.predict(user_ids=i,item_ids=[0,1,2,3,4,5,6]))
    return np.array(prediction)

In [134]:
embedding_dim=5
lr=0.01
epoch=100
k=2

In [135]:
warp_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="warp")
warp_model.fit(X_train,item_features=songs_features, epochs=epoch)

train_precision = precision_at_k(warp_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(warp_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(warp_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(warp_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.50, test 0.26.
AUC: train 0.97, test 0.68.


In [136]:
from tools.model.baseline import BaselineModel
baseline_model=BaselineModel(generator,capo_stat_train)

train_precision = precision_at_k(baseline_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(baseline_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(baseline_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(baseline_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.07, test 0.07.
AUC: train 0.44, test 0.44.


In [137]:
def get_chord_embeddings(model,vectorizer):
    chord_embeddings={}
    for chord, idx in vectorizer.chord_encoder.items():
        chord_embeddings[chord]=model.item_embeddings[idx].tolist()
    return chord_embeddings

def get_capo_embeddings(model,generator):
    capo_embeddings={}
    capo_names=[hoge.split("=")[-1] for hoge in generator.capo_encoder.get_feature_names()]
    for i, capo  in enumerate(capo_names):
        capo_embeddings[capo]=model.user_embeddings[i].tolist()
    return capo_embeddings

def get_embeddings(model,vectorzier,generator):
    return {"capo":get_capo_embeddings(model,generator),
    "chord": get_chord_embeddings(model,vectorizer)}

In [138]:
embeddings=get_embeddings(warp_model,vectorizer,generator)

In [139]:
embeddings["capo"]

{'capo0': [0.09215369075536728,
  0.173165425658226,
  0.11947067081928253,
  0.0821835845708847,
  0.006101572420448065],
 'capo1': [-0.09896015375852585,
  0.14635644853115082,
  -0.05504532903432846,
  -0.03203633427619934,
  -0.15096163749694824],
 'capo2': [-0.1943535953760147,
  0.17615582048892975,
  -0.07617438584566116,
  -0.08615019172430038,
  -0.006510838400572538],
 'capo3': [0.15353095531463623,
  -0.10481426864862442,
  -0.10572098940610886,
  -0.1168871521949768,
  0.038678333163261414],
 'capo4': [0.010404076427221298,
  -0.07147523015737534,
  0.07451014220714569,
  -0.05209476500749588,
  -0.1144038587808609],
 'capo5': [0.16645745933055878,
  -0.09417729824781418,
  0.016352884471416473,
  0.02322077751159668,
  -0.01601513847708702],
 'harf_down': [-0.046057384461164474,
  -0.06557648628950119,
  -0.014531945809721947,
  -0.11726851016283035,
  -0.030125504359602928]}

In [140]:
embeddings["chord"]

{'A': [-0.430338978767395,
  0.7953723669052124,
  0.050306014716625214,
  -0.5240473747253418,
  -0.16884534060955048],
 'A#': [0.14405293762683868,
  -0.5254839062690735,
  0.5221236348152161,
  0.008805683813989162,
  -0.27712875604629517],
 'A#7': [-0.09063883870840073,
  0.08211228251457214,
  -0.030858540907502174,
  0.026309693232178688,
  0.11880624294281006],
 'A#m': [1.04828941822052,
  -0.76384437084198,
  -0.17666877806186676,
  -0.34616467356681824,
  0.4756089746952057],
 'A#m7': [0.8072599172592163,
  -0.677769660949707,
  -0.49053168296813965,
  -0.3186749219894409,
  0.5457634925842285],
 'A#maj7': [0.18278250098228455,
  -0.17779874801635742,
  -0.016454510390758514,
  0.041218776255846024,
  -0.025120310485363007],
 'A7': [-0.9153162837028503,
  0.39763638377189636,
  -0.6255059838294983,
  1.485819935798645,
  0.42250892519950867],
 'Am': [-0.7466221451759338,
  0.3501734435558319,
  -0.36429959535598755,
  1.4694708585739136,
  0.8058778643608093],
 'Am7': [0.20525

 # jsonに保存

In [141]:
import json
from pathlib import Path
json_path=Path(f"../result/embeddings_{bow_threshold}_{embedding_dim}.json")
with json_path.open("w") as f:
    json.dump(embeddings,f)