# 中文歌詞產生器 (LSTM)

In [1]:
import markov_speaking
import re
import random
import numpy as np
import os
import keras
from rhyme_searching import *
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense

### environment setting

In [2]:
# set training mode
train_mode = False #set value to True when training

# artist name
artist = 'mayday' 

# training/dataset data 
text_file = 'mayday_lyrics.txt'

# output file
rap_file = 'output.txt' #empty before training

# initial lines
initial_lines = ['微光', '微光']

# training depth
depth = 4

# vector size
vector_size = 100

# ataset size
dataset_size = 30

### set up the model

In [3]:
def create_network(depth):
    model = Sequential()
    model.add(LSTM(4, input_shape=(2, 2), return_sequences=True))
    for i in range(depth):
        model.add(LSTM(8, return_sequences=True))
    model.add(LSTM(2, return_sequences=True))
    model.summary()
    model.compile(optimizer='rmsprop',
               loss='mse')
    if train_mode == False:
        model.load_weights(str(artist + '.h5'))
        print('loading saved network: ' + str(artist) + '.h5')
    return model

### split the text

In [4]:
def split_lyrics_file(text_file):
    text = open(text_file, encoding='UTF-8', errors='ignore').read()
    text = text.replace(' ','').split('\n')
    while '' in text:
        text.remove('')
    return text

### build the dataset for training

In [5]:
def build_dataset(lines):
    print('Start building, you have to wait')

    dataset = []
    line_list = []
    j = 0
    for line in lines:
        line_list = [line, len(line), rhyme(line)]
        dataset.append(line_list)
        j += 1
        print(j)
    x_data = []
    y_data = []
    for i in range(len(dataset) - 3):
        print(i)
        line1 = dataset[i][1:]
        line2 = dataset[i + 1][1:]
        line3 = dataset[i + 2][1:]
        line4 = dataset[i + 3][1:]
        x = [line1[0], line1[1], line2[0], line2[1]]
        x = np.array(x)
        x = x.reshape(2, 2)
        x_data.append(x)
        y = [line3[0], line3[1], line4[0], line4[1]]
        y = np.array(y)
        y = y.reshape(2, 2)
        y_data.append(y)
    x_data = np.array(x_data)
    y_data = np.array(y_data)
    print('Finished building the dataset')
    return x_data, y_data

### predict the next bar

In [6]:
def compose_rap(lyrics_file, model):
    
    ### uncomment following codes if initial lines are not set ###
    
    #human_lyrics = split_lyrics_file(lyrics_file)
    #initial_index = random.choice(range(len(human_lyrics) - 1))
    #initial_lines = human_lyrics[initial_index:initial_index + 2]
    
    ### uncomment above codes if initial lines are not set ###
    
    starting_input = []
    for line in initial_lines:
        starting_input.append([len(line), rhyme(line)])

    rap_vectors = []
    starting_vectors = model.predict(
        np.array([starting_input]).flatten().reshape(1, 2, 2))
    rap_vectors.append(starting_vectors)

    for i in range(vector_size):
        rap_vectors.append(model.predict(
            np.array([rap_vectors[-1]]).flatten().reshape(1, 2, 2)))

    return rap_vectors

### use the vectors to make songs

In [7]:
def vectors_into_song(vectors, generated_lyrics):
    print('\n\n')
    print('About to write rap (this could take a moment)...')
    print('\n\n')

    def calculate_score(vector_half, syllables, rhyme):
        desired_syllables = vector_half[0]
        desired_rhyme = vector_half[1]
        desired_rhyme = desired_rhyme * len(rhyme_list)

        score = 1.0 - (abs((float(desired_syllables) - float(syllables))) +
                       abs((float(desired_rhyme) - float(rhyme))))
        return score
    
    dataset = []
    for line in generated_lyrics:
        line_list = [line, len(line), rhyme(line)]
        dataset.append(line_list)
    
    rap = initial_lines
    for line in initial_lines:
        print(line)
    
    vector_halves = []
    for vector in vectors:
        vector_halves.append(list(vector[0][0]))
        vector_halves.append(list(vector[0][1]))
    for vector in vector_halves:
        scorelist = []
        for item in dataset:
            line = item[0]
            total_score = calculate_score(vector, item[1], item[2])
            score_entry = [line, total_score]
            scorelist.append(score_entry)
        fixed_score_list = []
        for score in scorelist:
            fixed_score_list.append(float(score[1]))
        if len(fixed_score_list) == 0:
            return rap
        max_score = max(fixed_score_list)
        for item in scorelist:
            if item[1] == max_score:
                rap.append(item[0])
                print(str(item[0]))

                for i in dataset:
                    if item[0] == i[0]:
                        dataset.remove(i)
                        break
                break
    return rap

### start training

In [8]:
def train(x_data, y_data, model):
    model.fit(np.array(x_data), np.array(y_data),
           batch_size=2,
           epochs=5,
           verbose=1)
    model.save_weights(artist + '.h5')
    print('Finished training')

### the main function

In [9]:
def main(depth, train_mode):
    # create the network
    model = create_network(depth)
    
    if train_mode == True:
        bars = split_lyrics_file(text_file)
    if train_mode == False:
        p = markov_speaking.Markov(text_file, 1)
        bars = []
        for _ in range(dataset_size):
            bars.append(p.say())
    if train_mode == True:
        x_data, y_data = build_dataset(bars)
        train(x_data, y_data, model)
    if train_mode == False:
        vectors = compose_rap(text_file, model)
        rap = vectors_into_song(vectors, bars)
        f = open(rap_file, 'w', encoding='UTF-8')
        for bar in rap:
            f.write(bar)
            f.write('\n')

In [10]:
main(depth, train_mode)

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\MAO-CH~1\AppData\Local\Temp\jieba.cache


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 2, 4)              112       
_________________________________________________________________
lstm_1 (LSTM)                (None, 2, 8)              416       
_________________________________________________________________
lstm_2 (LSTM)                (None, 2, 8)              544       
_________________________________________________________________
lstm_3 (LSTM)                (None, 2, 8)              544       
_________________________________________________________________
lstm_4 (LSTM)                (None, 2, 8)              544       
_________________________________________________________________
lstm_5 (LSTM)                (None, 2, 2)              88        
Total params: 2,248
Trainable params: 2,248
Non-trainable params: 0
______________________________________________________

Loading model cost 0.825 seconds.
Prefix dict has been built successfully.


眼神可以交流
生命怎麼揮霍
風箏飛上天空
空氣突然安靜
好像好像好像好像好像應該有點
為何我還追憶
轟轟烈烈劇情
甘知當初男兒
當時地球年輕
我們終將分離
就是生活滋味
只要尊重這個文化
沸沸揚揚頒獎
委屈自己一秒
看到滿天金條
走過動盪日子
親愛甘放未記
今夜我要離去
給你那麼那麼相信
時候招惹麻煩
相遇一個也許螞蟻
不過可愛模樣
此生無知奔忙
一字一句完整說出
逃離這個瘋狂世界
不過就是一條
我們光腳越過人間荒唐
我們不開時候
曾經快樂上面擺盪
眼淚依賴臉龐



About to write rap (this could take a moment)...



微光
微光
走過動盪日子
甘知當初男兒
眼神可以交流
生命怎麼揮霍
我們不開時候
空氣突然安靜
轟轟烈烈劇情
當時地球年輕
為何我還追憶
我們終將分離
親愛甘放未記
今夜我要離去
風箏飛上天空
就是生活滋味
委屈自己一秒
看到滿天金條
不過就是一條
沸沸揚揚頒獎
不過可愛模樣
此生無知奔忙
眼淚依賴臉龐
時候招惹麻煩
一字一句完整說出
給你那麼那麼相信
逃離這個瘋狂世界
相遇一個也許螞蟻
曾經快樂上面擺盪
只要尊重這個文化
我們光腳越過人間荒唐
好像好像好像好像好像應該有點
