# fastFMのclassifierを使ってみる

In [1]:
from pathlib import Path
import numpy as np
import sys,os
sys.path.append(os.pardir)
from tools.preprocess.common import CommonPreprocessor
from tools.preprocess.cls_preprocessor import CLSPreprocessor
from tools.model.fm_classifier import FMClassifier
from tools.preprocess.frequency_vectorizer import FrequencyVectorizer

In [2]:
orignal_path=Path("../data/original.txt")

In [3]:
cp=CommonPreprocessor(rare_capo_list=['-6', '-7'],val_rate=0.1,test_rate=0.2,split_seed=0)
songs=cp.get_song_list(orignal_path)
songs=cp.remove_rare_capo_song(songs)
songs_train, songs_valid, songs_test=cp.split_dataset(songs)

In [4]:
chord_stat=cp.retrieve_chord_stat(songs_train)

In [5]:
chords_list_train = [song["chords"] for song in songs_train]
chords_list_valid = [song["chords"] for song in songs_valid]
chords_list_test = [song["chords"] for song in songs_test]

In [6]:
fv=FrequencyVectorizer(chord_stat, threshold=5, token="<UNK>")

In [7]:
X_train=fv.get_chord_features(chords_list_train)
X_valid =fv.get_chord_features(chords_list_valid)
X_test=fv.get_chord_features(chords_list_test)
y_train=[song["rec_capo"] for song in songs_train]
y_valid=[song["rec_capo"] for song in songs_valid]
y_test=[song["rec_capo"] for song in songs_test]

In [8]:
cp= CLSPreprocessor(dupulication_capo_order=np.array(['1','0','-1','-2','-3','-4','-5']))

In [9]:
model=FMClassifier(n_iter=10*10000, rank=10,preprocessor=cp)

In [10]:
model.fit(X_train,y_train)

FMClassification(init_stdev=0.1, l2_reg=None, l2_reg_V=0, l2_reg_w=0,
         n_iter=100000, random_state=123, rank=10, step_size=0.1)

In [11]:
rankings=model.predict_ranking(X_valid)
rankings[:10]

[array(['0', '-1', '-3', '1', '-4', '-5', '-2'], dtype='<U2'),
 array(['1', '0', '-2', '-3', '-4', '-5', '-1'], dtype='<U2'),
 array(['-3', '1', '0', '-1', '-2', '-5', '-4'], dtype='<U2'),
 array(['-2', '0', '-1', '-3', '1', '-5', '-4'], dtype='<U2'),
 array(['1', '0', '-3', '-5', '-4', '-2', '-1'], dtype='<U2'),
 array(['-2', '0', '-1', '1', '-4', '-5', '-3'], dtype='<U2'),
 array(['0', '-2', '1', '-1', '-4', '-5', '-3'], dtype='<U2'),
 array(['-3', '0', '1', '-1', '-2', '-5', '-4'], dtype='<U2'),
 array(['0', '1', '-2', '-5', '-4', '-3', '-1'], dtype='<U2'),
 array(['-2', '0', '-1', '1', '-4', '-5', '-3'], dtype='<U2')]

In [12]:
pred_top1= model.predict(X_valid)
pred_top1[:10]

array(['0', '1', '-3', '-2', '1', '-2', '0', '-3', '0', '-2'], dtype='<U2')

In [13]:
model.evaluate_top1(X_valid,y_valid)

0.13925

In [14]:
chord_embeddings, capo_embeddings=model.get_embeddings()

In [15]:
chord_embeddings.shape,capo_embeddings.shape

((691, 10), (7, 10))