# 06 Final workflow for deployment

This notebook is the preparation for the deployment to determined which steps were neede to get a text, get the BERT tokens and predict the genre.


## Data files needed:
- final_lyrics_model.pkl.gz

In [22]:
# setup
import sys
import subprocess
import pkg_resources
from collections import Counter
import re
from numpy import log, mean, matmul


required = {'spacy', 'scikit-learn', 'numpy', 
            'pandas', 'torch', 'matplotlib',
            'transformers', 'allennlp==0.9.0'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed

if missing:
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)
import spacy
import numpy as np
import pandas as pd

# SciKit Learn
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

import torch
# Spacy
from spacy.lang.en import English
en = English()

from sklearn.model_selection import train_test_split

# File managment
import os
from os import listdir
from pathlib import Path
import pickle
import gzip

import uuid

In [23]:
import transformers
# what we're used to: BERT
from transformers import BertTokenizer, BertModel 

MODEL_NAME = 'bert-base-uncased'
# Load pre-trained model
model = BertModel.from_pretrained(MODEL_NAME)
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)


In [24]:
COLAB = False

if COLAB:
  # Google Colab
  path = "./"
  device = torch.device("cuda:0") # use GPU, change 
else:
  # Laptop
  path = "./data/"
  device = torch.device("cpu")

In [41]:
filename = f"{path}user_lyrics_data_tmp.csv"

In [42]:
prediction_model = pd.read_pickle(f'{path}final_lyrics_model.pkl.gz')

In [43]:
def clean_single_lyric(text):
  x = text.strip()
  y = re.sub('\[.*?\]', '', x)
  z = re.sub('\(.*?\)', '', y)
  z1 = pd.DataFrame([z], columns=["Lyric"])
  return z1

In [44]:
def generate_single_BERT_token(text):
  tokens = tokenizer.batch_encode_plus(
          text,
          pad_to_max_length=True, 
          return_tensors="pt",
          max_length=512,
          truncation=True)
  tokens.to(device)
  outputs = model(**tokens)
  o = outputs[0][:,0].cpu().detach().numpy()
  return o

In [45]:
def prediction(token):
  return prediction_model.predict(token)

In [46]:
def generate_id():
  uuidOne = uuid.uuid1()
  return uuidOne.hex

In [47]:
def write_prediction(cleaned_lyrics, token, prediction, id):
  df = pd.DataFrame.from_dict({"id": [id], "Prediction": predicted_genre, "Truth": '', "Lyric": cleaned_lyrics})
  df_token = pd.DataFrame(token)
  df_record = pd.concat([df, df_token], axis=1)
  
  try:
    with open(filename) as f:
      df_in = pd.read_csv(filename)
      if len(df_in[df_in["id"] == id]) == 0:
        df_in = df_in.append(df_record)
        df_in.to_csv(filename,index=False)
  except IOError:
    df_record.to_csv(filename,index=False)
  
  return

In [48]:
def update_prediction(id, new_value):
  df_in = pd.read_csv(filename)
  if len(df_in[df_in["id"] == id]) == 1:
    df_in.loc[df_in["id"] == id, ["Truth"]] = new_value
    df_in.to_csv(filename, index=False)

# User enters Lyric

In [49]:
lyric = "I can't remember anything Can't tell if this is true or a dream Deep down inside I feel to scream This terrible silence stops me Now that the war is through with me I'm waking up, I cannot see That there's not much left of me Nothing is real but pain now  [Chorus] Hold my breath as I wish for death Oh please God, wake me  [Verse 2] Back in the womb it's much too real In pumps life that I must feel But can't look forward to reveal Look to the time when I'll live Fed through the tube that sticks in me Just like a wartime novelty Tied to machines that make me be Cut this life off from me  [Chorus] Hold my breath as I wish for death Oh please God, wake me  [Short Instrumental Break]  [Chorus] Now the world is gone, I'm just one Oh, God help me Hold my breath as I wish for death Oh please God, help me  [Instrumental Break]  [Bridge] Darkness imprisoning me All that I see, absolute horror I cannot live, I cannot die Trapped in myself, body my holding cell Landmine has taken my sight Taken my speech, taken my hearing Taken my arms, taken my legs Taken my soul, left me with life in Hell"

In [50]:
# Clean lyric
text = clean_single_lyric(lyric)
print(list(text["Lyric"]))

["I can't remember anything Can't tell if this is true or a dream Deep down inside I feel to scream This terrible silence stops me Now that the war is through with me I'm waking up, I cannot see That there's not much left of me Nothing is real but pain now   Hold my breath as I wish for death Oh please God, wake me   Back in the womb it's much too real In pumps life that I must feel But can't look forward to reveal Look to the time when I'll live Fed through the tube that sticks in me Just like a wartime novelty Tied to machines that make me be Cut this life off from me   Hold my breath as I wish for death Oh please God, wake me     Now the world is gone, I'm just one Oh, God help me Hold my breath as I wish for death Oh please God, help me     Darkness imprisoning me All that I see, absolute horror I cannot live, I cannot die Trapped in myself, body my holding cell Landmine has taken my sight Taken my speech, taken my hearing Taken my arms, taken my legs Taken my soul, left me with li

In [51]:
# Generate token
token =  generate_single_BERT_token(text)

In [52]:
# get prediction
predicted_genre = prediction(token)

In [53]:
# generate unique id to identify the record
id= generate_id()

In [54]:
# Write prediction to file
raw_text=list(text["Lyric"])

In [55]:
# write prediction to file
write_prediction(raw_text, token, predicted_genre, id)

In [56]:
### users sends genre back - update the prediction
update_prediction(id, 'Metal')