In [1]:
!pip install transformers
!pip install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 5.0 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 67.7 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 40.8 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gradio
  Downloading gradio-3.12.0-py3-none-any.whl (11.6 MB)
[K     |████████████████████████████████| 11.6 MB 5.0 MB/s 

In [2]:
from transformers import AutoTokenizer, GPT2Tokenizer, GPT2ForSequenceClassification, GPT2Model

import torch 
from torch import nn

import numpy as np

import gradio as gr

from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ECE1786 Project/Best model

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1fJllm6_aqmmawo8B6hq9rhewEDIBmSpR/ECE1786 Project/Best model


In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [4]:
class GPT2Classifier(nn.Module):
  def __init__(self, embedding_size, num_classes,max_seq_len, gpt_model_name):
    super(GPT2Classifier,self).__init__()
    self.gpt2 = GPT2Model.from_pretrained(gpt_model_name)
    # self.classifier_head = nn.Linear(embedding_size*max_seq_len, num_classes)    
    self.classifier_head = nn.Sequential(
                                nn.Linear(embedding_size*max_seq_len, 768),
                                nn.ReLU(),
                                nn.Dropout(0.1),
                                nn.Linear(768, num_classes)
                              )

      
  def forward(self, input_id, mask):
    gpt_out, _ = self.gpt2(input_ids=input_id, attention_mask=mask, return_dict=False)
    batch_size = gpt_out.shape[0]
    linear_output = self.classifier_head(gpt_out.view(batch_size,-1))
    return linear_output

In [13]:
model_name = "distilgpt2"

tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.padding_side = "right"
tokenizer.pad_token = tokenizer.eos_token

song_genre_classifier = GPT2Classifier(embedding_size=768, num_classes=8, max_seq_len=256, gpt_model_name=model_name)
if use_cuda:
  song_genre_classifier = song_genre_classifier.cuda()

Some weights of the model checkpoint at distilgpt2 were not used when initializing GPT2Model: ['lm_head.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [14]:
gpt_checkpoint_file = "distilgpt2-best-model.pt"
checkpoint = torch.load(gpt_checkpoint_file)
song_genre_classifier.load_state_dict(checkpoint)

<All keys matched successfully>

In [15]:
def process(text):
   text = text.replace("\n\n", "$$^^$$").replace("\n", " ").replace("$$^^$$", "\n")
   text = tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors="pt")     
   return text

genre_labels = ['Blues', 'Country', 'EDM', 'Hip Hop', 'Indie', 'Pop', 'R&B', 'Rock']

def classifier(lyrics):
  #get input
  input = process(lyrics)
  mask = input['attention_mask'].to(device)
  input_id = input['input_ids'].squeeze(1).to(device)

  #predict
  output = song_genre_classifier(input_id, mask)
  pred = output.argmax(dim=1)
  genre = genre_labels[pred]
  return genre

In [16]:
#GRADIO PART
def run_demo():
  input_box = gr.Textbox(label="Enter Song Lyrics to Predict", lines=10, placeholder="Sentence needs to be longer than 3 words...")
  prediction_box = gr.Textbox(label="Genre")

  demo = gr.Interface(
      fn=classifier,
      inputs=input_box,
      outputs=[prediction_box],
  )
  demo.launch(debug=True)

  

In [None]:
run_demo()

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>