# capoデータにlightFMを適用してみる

In [1]:
from pathlib import Path
import sys,os
sys.path.append(os.pardir)
from tools.preprocess.common import CommonPreprocessor
from tools.preprocess.interaction_matrix_generator import InteractionMatrixGenerator
from tools.preprocess.bow_vectorizer import BOWVectorizer
from scipy.sparse import csr_matrix

## data部分

In [2]:
orignal_path=Path("../data/original.txt")

In [3]:
cp=CommonPreprocessor(rare_capo_list=['-6', '-7'],test_rate=0.2,split_seed=0)
songs=cp.get_song_list(orignal_path,shuffle=True)
songs=cp.remove_rare_capo_song(songs)
songs_train, songs_test=cp.split_dataset(songs,shuffle=False)

In [4]:
generator=InteractionMatrixGenerator(test_rate=0.1)
X_train,X_test=generator.generate_matrices(songs)
X_train=csr_matrix(X_train)
X_test=csr_matrix(X_test)

In [5]:
chord_stat_train=cp.retrieve_chord_stat(songs_train)
capo_stat_train=cp.retrieve_capo_stat(songs_train)

In [6]:
vectorizer=BOWVectorizer(chord_stat=chord_stat_train,threshold=10000)

In [7]:
songs_features=vectorizer.get_chord_features(songs)
print(songs_features.shape)
songs_features=csr_matrix(songs_features)

(39995, 61)


## model部分

In [8]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score



In [30]:
import numpy as np
def get_prediction(model,n_songs):
    prediction=[]
    for i in range(n_songs):
        prediction.append(model.predict(user_ids=i,item_ids=[0,1,2,3,4,5,6]))
    return np.array(prediction)

In [36]:
embedding_dim=4
lr=0.01
epoch=100
k=2

In [37]:
warp_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="warp")
warp_model.fit(X_train,item_features=songs_features, epochs=epoch)

train_precision = precision_at_k(warp_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(warp_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(warp_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(warp_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.49, test 0.19.
AUC: train 0.97, test 0.59.


In [38]:
log_model = LightFM(no_components=embedding_dim,learning_rate=lr, loss="logistic")
log_model.fit(X_train,item_features=songs_features, epochs=epoch)

train_precision = precision_at_k(log_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(log_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(log_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(log_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.13, test 0.13.
AUC: train 0.47, test 0.47.


In [39]:
from tools.model.baseline import BaselineModel
baseline_model=BaselineModel(generator,capo_stat_train)

train_precision = precision_at_k(baseline_model, X_train,item_features=songs_features, k=k).mean()
test_precision = precision_at_k(baseline_model, X_test,item_features=songs_features, k=k).mean()

train_auc = auc_score(baseline_model, X_train, item_features=songs_features).mean()
test_auc = auc_score(baseline_model, X_test, item_features=songs_features).mean()

print(f"Precision`@{k}: train {train_precision:.2f}, test {test_precision:.2f}.")
print(f"AUC: train {train_auc:.2f}, test {test_auc:.2f}.")

Precision`@2: train 0.06, test 0.06.
AUC: train 0.34, test 0.35.
