In [2]:
import os
from dotenv import load_dotenv

from langchain import PromptTemplate, HuggingFaceHub, FewShotPromptTemplate
from langchain.chains import LLMChain
from langchain.schema import HumanMessage
from langchain.chat_models import ChatOpenAI

import pandas as pd

load_dotenv()

True

In [3]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')

In [4]:
ts_df = pd.read_csv('../data/raw/taylor_swift_raw.csv')

In [5]:
ts_df.head()

Unnamed: 0,name,album,dance,acoustic,energy,instrumental,liveness,loudness,speech,tempo,valence,popularity,lyrics
0,Welcome To New York (Taylor's Version),1989 (Taylor's Version) [Deluxe],0.757,0.00942,0.61,3.7e-05,0.367,-4.84,0.0327,116.998,0.685,73,"[Verse 1]Walkin' through a crowd, the village ..."
1,Blank Space (Taylor's Version),1989 (Taylor's Version) [Deluxe],0.733,0.0885,0.733,0.0,0.168,-5.376,0.067,96.057,0.701,74,"[Verse 1]Nice to meet you, where you been?I co..."
2,Style (Taylor's Version),1989 (Taylor's Version) [Deluxe],0.511,0.000421,0.822,0.0197,0.0899,-4.785,0.0397,94.868,0.305,75,"[Verse 1]MidnightYou come and pick me up, no h..."
3,Out Of The Woods (Taylor's Version),1989 (Taylor's Version) [Deluxe],0.545,0.000537,0.885,5.6e-05,0.385,-5.968,0.0447,92.021,0.206,74,[Verse 1]Looking at it nowIt all seems so simp...
4,All You Had To Do Was Stay (Taylor's Version),1989 (Taylor's Version) [Deluxe],0.588,0.000656,0.721,0.0,0.131,-5.579,0.0317,96.997,0.52,72,"[Intro](Hey, hey, hey)(Hey, hey, hey)(Hey, hey..."


In [40]:
template = """
You are a program that determines if a song is happy or sad based off of the lyrics

Here's a song to classify:

====================
Title: {song_title}
Lyrics: {song_lyrics}
====================

Determine if the song is happy or sad.
"""

In [41]:
llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn")



In [42]:
prompt = PromptTemplate(
    input_variables=['song_title', 'song_lyrics'],
    template=template,
)
prompt.format(song_title=ts_df['name'][0], song_lyrics=ts_df['lyrics'][0])



In [43]:
chain = LLMChain(llm=llm, prompt=prompt)

In [44]:
chain.run(song_title=ts_df['name'][5], song_lyrics=ts_df['lyrics'][5])

"You are a program that determines if a song is happy or sad based off of the lyrics. Here's a song to classify: Shake It Off (Taylor's Version)Lyrics: [Verse 1]I stay out too lateGot nothin' in my brainThat's what people say, mm-mm."

In [36]:
ts_df['lyrics'][5]

'[Verse 1]I stay out too lateGot nothin\' in my brainThat\'s what people say, mm-mmThat\'s what people say, mm-mmI go on too many dates (Haha)But I can\'t make them stayAt least, that\'s what people say, mm-mmThat\'s what people say, mm-mm[Pre-Chorus]But I keep cruisin\'Can\'t stop, won\'t stop movin\'It\'s like I got this music in my mindSayin\', "It\'s gonna be alright"[Chorus]\'Cause the players gonna play, play, play, play, playAnd the haters gonna hate, hate, hate, hate, hateBaby, I\'m just gonna shake, shake, shake, shake, shakeShake it off, I shake it off (Ooh)Heartbreakers gonna break, break, break, break, breakAnd the fakers gonna fake, fake, fake, fake, fakeBaby, I\'m just gonna shake, shake, shake, shake, shakeI shake it off, I shake it off (Ooh)'

In [45]:
from transformers import pipeline

In [46]:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

In [48]:
print(summarizer(ts_df['lyrics'][5], max_length=500, min_length=100, do_sample=False))

Your max_length is set to 500, but your input_length is only 229. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=114)


[{'summary_text': '"I stay out too lateGot nothin\' in my brain" "I keep cruisin\'Can\'t stop, won\'t stop movin\'"It\'s like I got this music in my mind" "It\'s gonna be alright" "Baby, I\'m just gonna shake, shake, shaking, shake" "Shake it off, I shake it off (Ooh) "I go on too many dates (Haha)But I can\'t make them stay" "Heartbreakers gonna break" "And the haters gonna hate"'}]
