<a href="https://colab.research.google.com/github/bgtripp/Computational-Modeling-and-Simulation/blob/master/Textgenrnn_LTSM_Pop_Lyrics_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Initial Prep**

In [0]:
#Copyright Benjamin Tripp 2020
import pandas as pd 
import numpy as np 
!pip install -q textgenrnn
from google.colab import files
from textgenrnn import textgenrnn
from datetime import datetime
import os

#Helpful site: https://github.com/minimaxir/textgenrnn/blob/master/docs/textgenrnn-demo.ipynb

#Data set of billboard lyrics
url = 'https://raw.githubusercontent.com/walkerkq/musiclyrics/master/billboard_lyrics_1964-2015.csv'

song_data = pd.read_csv(url, encoding = "latin-1") #Pandas dataframe
song_data.head() #Shows the first 5 lines of the dataframe

Using TensorFlow backend.


Unnamed: 0,Rank,Song,Artist,Year,Lyrics,Source
0,1,wooly bully,sam the sham and the pharaohs,1965,sam the sham miscellaneous wooly bully wooly b...,3.0
1,2,i cant help myself sugar pie honey bunch,four tops,1965,sugar pie honey bunch you know that i love yo...,1.0
2,3,i cant get no satisfaction,the rolling stones,1965,,1.0
3,4,you were on my mind,we five,1965,when i woke up this morning you were on my mi...,1.0
4,5,youve lost that lovin feelin,the righteous brothers,1965,you never close your eyes anymore when i kiss...,1.0


#**Preparing Data**

In [0]:
def getLyrics(year):

  subset = song_data[song_data['Year'] == year]
  lyrics = subset['Lyrics']
  lyrics.to_csv(r'lyrics.txt', header=None, index=None, sep=' ', mode='a')

year = 1965
file_name = 'lyrics.txt'.format(year)
model_name = '{}PopLyrics'.format(year)

NameError: ignored

#**Configuring Recurrent Neural Network**

In [0]:
model_cfg = {
    'word_level': False,   # set to True if want to train a word-level model (requires more data and smaller max_length)
    'rnn_size': 128,   # number of LSTM cells of each layer (128/256 recommended)
    'rnn_layers': 3,   # number of LSTM layers (>=2 recommended)
    'rnn_bidirectional': False,   # consider text both forwards and backward, can give a training boost
    'max_length': 30,   # number of tokens to consider before predicting the next (20-40 for characters, 5-10 for words recommended)
    'max_words': 10000,   # maximum number of words to model; the rest will be ignored (word-level model only)
}

train_cfg = {
    'line_delimited': True,   # set to True if each text has its own line in the source file
    'num_epochs': 20,   # set higher to train the model for longer default 20
    'gen_epochs': 1,   # generates sample text from model after given number of epochs
    'train_size': 0.8,   # proportion of input data to train on: setting < 1.0 limits model from learning perfectly
    'dropout': 0.0,   # ignore a random proportion of source tokens each epoch, allowing model to generalize better
    'validation': False,   # If train__size < 1.0, test on holdout dataset; will make overall training slower
    'is_csv': False   # set to True if file is a CSV exported from Excel/BigQuery/pandas
}

  """


5000     this hit that ice cold michelle pfeiffer that...
5001     when your legs dont work like they used to be...
5002     its been a long day without you my friend and...
5003     im like hey wassup hello seen yo pretty ass s...
5004     im hurting baby im broken down i need your lo...
Name: Lyrics, dtype: object

#**Creating and Training Neural Network**

In [0]:
textgen = textgenrnn(name=model_name)

train_function = textgen.train_from_file if train_cfg['line_delimited'] else textgen.train_from_largetext_file

train_function(
    file_path=file_name,
    new_model=True,
    num_epochs=train_cfg['num_epochs'],
    gen_epochs=train_cfg['gen_epochs'],
    batch_size=1024,
    train_size=train_cfg['train_size'],
    dropout=train_cfg['dropout'],
    validation=train_cfg['validation'],
    is_csv=train_cfg['is_csv'],
    rnn_layers=model_cfg['rnn_layers'],
    rnn_size=model_cfg['rnn_size'],
    rnn_bidirectional=model_cfg['rnn_bidirectional'],
    max_length=model_cfg['max_length'],
    dim_embeddings=100,
    word_level=model_cfg['word_level'])












99 texts collected.
Training new model w/ 3-layer, 128-cell LSTMs
Training on 151,426 character sequences.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/20
####################
Temperature: 0.2
####################
""      ea        o                                           it                 a          e                         e                             ri   i                          ia a                    o  o                   o   a     e         a            at        e        t       o      oo  

"""            i                 a   a  o  a        e  t                   o         o                              t                       o                            o           e     e     h         oo                    i   i     e        o     h    ee     o n o                 e      n  s   

"""   "                             o          t         e   o               oe                o  a    e          t 

# **Testing Pre-trained model**

In [0]:
year = '1979' #Input your desired year here! (1965-2015)
decade = year[0:3]+'5'

model_name = '{}PopLyrics'.format(decade)

import urllib.request

#weights_url = 'https://raw.githubusercontent.com/bgtripp/Lyrics-Neural-Net/master/{}/{}_weights.hdf5'.format(year, model_name)
weights_url = 'https://raw.githubusercontent.com/bgtripp/Lyrics-Neural-Net/master/multi/{}/{}weights.hdf5'.format(year[0:3]+'0', year)
urllib.request.urlretrieve(weights_url, 'weights')

vocab_url = 'https://raw.githubusercontent.com/bgtripp/Lyrics-Neural-Net/master/{}/{}_vocab.json'.format(decade, model_name)
urllib.request.urlretrieve(vocab_url, 'vocab')

config_url = 'https://raw.githubusercontent.com/bgtripp/Lyrics-Neural-Net/master/{}/{}_config.json'.format(decade, model_name)
urllib.request.urlretrieve(config_url, 'config')

textgen = textgenrnn(weights_path = 'weights',
                      vocab_path = 'vocab',
                      config_path = 'config')














In [0]:
textgen.generate_samples(3, temperatures=[0.2,0.5,0.7,0.9, 1.1])

####################
Temperature: 0.2
####################
" i cant live without you baby what you know i can you give up me i think above i know ill never love this way again so i keep holdin on before the good is gone i know ill never love this way again so i want the thing for you wont settle down the fire of love is a friend to tell me didnt i was wro

" i cant go on its train the game that youre the only one i love and you cant change that youre the only one i need and you cant change that you feel nothin heart and then they be crumble dont try to tell me now i love you so right now i love you so right now i love you so right now i love you so 

" when we make love i cant see me now playin the night away oh oh oh oh oh oh aint no colol an end you and you cant change that youre the only one i love and you cant change that youre the one youre the one i wanna stop the winter when we make love i cant see me now playin the morning when i want 

####################
Temperature: 0.5
###

#**Fine-tuning Models on New Data**

In [0]:
textgen = textgenrnn(weights_path = '1977weights.hdf5',
                      vocab_path = 'vocab',
                      config_path = 'config')

In [0]:
textgen.generate_samples(3, temperatures=[0.2,0.5,0.7,0.9, 1.1])

####################
Temperature: 0.2
####################
" i can have i tomten anything i want you see these tears i cant hear a word you belong oh what a night to want you dear oh what a night to kiss you dear thats why i love you soight think all the time who cant see them steepperiding steels colors you and he hallerut world ended the moment you left

" i can have it fairest money you see whats way dont let my heart its not a realica wait cant be mine where you once belonged  get back get back  get back to where you once belonged  get back get back  get back to where you once belonged  get back get back  get back to where you once belonged  get

" i can have it fairectely to your heart beat its going to summer day and i cant see you need to show you what you said i dont need no other lover bonner what i do but you dont ever till the time of the never seemed so bad thats how it is thats the way love is sure enough how it is thats the way l

####################
Temperature: 0.5
###

In [0]:
year = 1974 #Additional year
getLyrics(year)

textgen.train_from_file('lyrics.txt', num_epochs=10)

99 texts collected.
Training on 97,903 character sequences.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  import sys




Epoch 1/10
####################
Temperature: 0.2
####################
" my my my my my my my my my mind my mind in the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band 

" my my my my my my my mind in the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the runnif the love my my my my my my my my my my my my my my my my my my m

" my mind in the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band on the run band 

####################
Tempera

KeyboardInterrupt: ignored

In [0]:
!git status
!git clone https://github.com/bgtripp/Lyrics-Neural-Net.git

fatal: not a git repository (or any of the parent directories): .git
Cloning into 'Lyrics-Neural-Net'...
remote: Enumerating objects: 56, done.[K
remote: Counting objects: 100% (56/56), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 56 (delta 17), reused 46 (delta 9), pack-reused 0[K
Unpacking objects: 100% (56/56), done.
