In [1]:
import numpy as np
import pandas as pd

seed_ = 20200218
np.random.seed(seed_)

import keras.backend as K
from keras.models import Model, Sequential, load_model
from keras.layers import Dense, Input, SimpleRNN, TimeDistributed, Activation, GRU, LSTM
from keras.optimizers import *
from keras.activations import *

In [2]:
def my_model(n_a, n_x):
    X = Input(shape=(None, n_x))
    
    x = LSTM(n_a, return_sequences=True)(X)
    x = Dense(n_x)(x)
    x = Activation('softmax')(x)
    
    model = Model(inputs=X, outputs=x)
    
    return model

In [3]:
df = pd.read_csv("./data/sawano_tracklist.csv")
df = df[df['year'] >= 2008]

## 
## import data

In [4]:
tracklist = np.unique(df['track_name']).tolist()

In [5]:
chars = ['\n'] + list(set(''.join(tracklist)))
chars[:10]

['\n', 'ε', '?', 'K', 'そ', 'げ', 'F', 'だ', '度', '服']

In [6]:
sample_size = len(tracklist)
n_x = len(chars)
n_a = 64

In [7]:
sample_size, n_x, n_a

(1201, 531, 64)

In [8]:
char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i, ch in enumerate(chars)}

## 
### preprocess function

In [9]:
def encode_onehot(char, char_to_ix, n_x):
    x = np.zeros((n_x, ))
    x[char_to_ix[char]] = 1
    return x

def decode_onehot(onehot, ix_to_char):
    ix = np.argmax(onehot)
    char = ix_to_char[ix]
    return char

def encode_sentence(sentence, char_to_ix, n_x):
    onehot = []
    for char in sentence:
        x = encode_onehot(char, char_to_ix, n_x)
        onehot.append(x)
    return np.asarray(onehot)

def decode_sentence(onehot_arr, ix_to_char):
    decoded = []
    for onehot in onehot_arr:
        x = decode_onehot(onehot, ix_to_char)
        decoded.append(x)
    return decoded

def decode_prob(predicted_prob, ix_to_char):
    ix = np.argmax(predicted_prob)
    return ix_to_char[ix]

def decode_prob_sentence(predicted_prob_arr, ix_to_char):
    chars = []
    for prob in predicted_prob_arr:
        char = decode_prob(prob, ix_to_char)
        chars.append(char)
    return chars

### 
#### model sampling

In [10]:
def sampling(model, ix_to_char, n_x, max_len=25):
    x = np.zeros((1, n_x))
    for i in range(max_len):
        x_input = x.reshape(-1, x.shape[0], x.shape[1])
        predicted = model.predict(x_input)
        probs = predicted[-1]
        last_char_prob = probs[-1]
        
        # sampling char
        loc = np.random.choice(range(n_x), p=last_char_prob)
        x_next = np.zeros((n_x, ))
        x_next[loc] = 1
        
        # check len
        if len(x) > 2 or decode_onehot(x_next, ix_to_char) != '\n':
            x = np.append(x, [x_next], axis=0)
        
        # check line break (stop gen)
        if len(x) > 2 and decode_onehot(x[-1], ix_to_char) == '\n':
            break
    return decode_sentence(x, ix_to_char)

## 
### load model

In [27]:
model = load_model("./models/sawano_char_15000")
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, 531)]       0         
_________________________________________________________________
lstm (LSTM)                  (None, None, 50)          116400    
_________________________________________________________________
dense (Dense)                (None, None, 531)         27081     
_________________________________________________________________
activation (Activation)      (None, None, 531)         0         
Total params: 143,481
Trainable params: 143,481
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.weights

[<tf.Variable 'lstm/lstm_cell/kernel:0' shape=(531, 200) dtype=float32, numpy=
 array([[ 0.02409226,  0.06767722,  0.03145941, ..., -0.00377653,
          0.0263088 , -0.06564011],
        [-0.534505  ,  0.9239339 ,  0.7490791 , ...,  0.6575199 ,
         -0.01110862, -0.30591467],
        [ 0.7585114 ,  0.4827532 ,  1.4850377 , ..., -0.54564804,
         -0.29762042,  0.7777611 ],
        ...,
        [ 0.30768427,  0.05288136, -1.3836535 , ...,  1.2516465 ,
          0.9099668 , -0.3339516 ],
        [ 1.1969805 ,  1.0752571 ,  0.9545464 , ...,  0.67337424,
          0.7998509 ,  0.48201376],
        [ 0.6349027 , -0.23751032,  0.61183006, ..., -0.73035777,
          0.23578557,  0.41362372]], dtype=float32)>,
 <tf.Variable 'lstm/lstm_cell/recurrent_kernel:0' shape=(50, 200) dtype=float32, numpy=
 array([[ 1.4517897 , -0.19537887, -0.06707973, ..., -0.7797878 ,
         -0.44563487, -0.5267583 ],
        [-0.78089094, -0.4393632 ,  0.3197299 , ..., -0.65691996,
          0.5396022 , 

In [28]:
n = 100

generated = []
for i in range(n):
    gen = sampling(model, ix_to_char, n_x, 25)
    generated.append(gen)



In [29]:
for gen in generated:
    print("".join(gen[:-1]), end="")


冷笑士・ʜ中中統 
中†覇様頭中覇怒朝様件†空身寝I
6ま乱け映付ʜ覇♂空 気
†覇覇とd、ʜ、
、†と統→笑
:†頭覇ʜ覇ら覇
6冷、＆けと6割
割う中 様†朝様朝と様・攻覇乱中:様・、中覇6
覇ʜ本統
笑覇ʜ中統様
正覇、ま覇ʜ朝様件6気Ⅰ・美度度劇I
従頭覇ʜ、乱、ʜ美様件sHI
6け正質空質映
ʜ中従覇ʜ中怒覇
中→頭従様リ覇ʜ乱本中従頭覇ʜ様闘陰部朝
、ʜ覇乱中う、怒

詩駆冷覇度度6
付z映日割6ⅠX
Hʜ、統X覇乱中統従覇ʜ:H 中統駆
中†覇様と笑様乱
カ怒頭覇覇美と:統覇怒ʜ
鬼・Ⅰ6rけ気:らそ
本従従、†覇r
本、従覇ʜ乱、
中怒頭怒
鎖:・Cう、.覇
上6、♂空劇割映†本ʜ空6♂空♂κ劇寝覇ʜ乱本
 団ら度様r覇6冷気ら
H美:雷度
、中従覇ʜ、従中覇乱6
本乱従覇ʜ劇I
本怒_覇ʜκ
笑覇ʜ覇統
リ:度そ度攻度
前陰し
冷覇ら♂質
ら覇鑼従様、†覇
、ʜ怒朝
覇ʜ朝
怒頭中統覇ʜ朝様、様、と様詩駆本朝様ら本中乱覇ʜ
†本†様け覇覇様モ統駆_、う様H頭覇様付中統覇
 割そ気気攻美
Ⅰら:割
割ʜ:
ま覇従覇ʜ様割ʜ初従と朝
覇ʜ覇統ま覇鬼
ʜ覇$本乱覇統ま覇、中映発う中つ
中、う覇ら鬼覇ʜ
HⅠ統覇ととと統映、ま、攻覇乱劇
と†従覇乱ʜ覇統従様気統ま
凸狂る団メ本度本
駆覇正本統覇乱
従口と質ま
 質→頭覇統覇う中ら
度CC
覇、物様攻中割ʜう覇
祭とと、従
、ʜ従朝と朝ま、統
Hʜ覇ヲ様と統統空乱従朝覇統従様覇統覇乱
（駆映r覇従従本ま覇ʜ乱乱と笑と:乱:HⅠ映X割
覇ʜ、統覇乱
統駆覇モぉ駆Cr気
とʜ覇統様サ:統乱従気ʜ覇I
_覇とと6H
-と空空
怒ʜ中怒覇正中統→覇う中覇
鎖正覇H覇、ʜ、統従と様極従
 Ⅰそ割・映割攻様気ら様6割様従頭従覇
そ攻割6
ヲ-♂統と寝従様雷中†覇♀様サ:統乱従ʜ本†覇統
 :ら度
本ʜ覇最覇乱従とʜま
寝覇ʜ乱本H覇統様冷とʜと統
Ⅰ†美
奏け♂
ノノ士?
/標闘
凸uそ幻そ付 ʜ頭美覇ʜ覇統覇様統
B楽映C映質
と†従と従覇ʜ
d♂空空空
Ⅰら映ヲ
z映C気
r本本そけ様割攻・映C覇♂サ冷Ⅰ・攻6気本Hう覇
統とʜ、♂空発
駆初度割C
気ら度ゲHつ呂r冷質
怒頭と統中う、ʜ朝中ら
空ヲ空
:◎リ度そC
ノノ映ノ士δ
映υ♂吾質Β上えサd♂†美ʜ怒Ⅰ・様H吾雷映、冷
笑頭中正、覇ʜ覇ʜ従
、怒_本度部