# BERT Model Outcomes from 1980-1989

In [1]:
import pandas as pd
import numpy as np 

In [None]:
# Importing cleaned and processed data
df = pd.read_csv('../../data/clean_processed.csv')

In [None]:
# Filtering data for only the 1980s
df = df[df['Year'].between(1980, 1989)]
df['Year'].unique()

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989])

In [None]:
# Checking the demensions for filtered data 
df.shape

(981, 11)

In [5]:
df.head(1)

Unnamed: 0,Year,Artist,Song Title,Rank,Lyrics,Nouns,Verbs,Adverbs,Corpus,Word Counts,Unique Word Counts
1980,1980,Blondie,Call Me,1,"​ Colour me your colour, baby Colour me your...",​ colour baby car colour darling chart line da...,colour colour colour know come know come call ...,anytime never anytime anytime anywhere anytime...,colour colour baby colour car colour colour da...,388,125


In [None]:
import re
from string import punctuation

# Light cleaning function to clean lyrics
def light_clean(text):
    if pd.isna(text):
        return ""
    # converting to lowercase
    text = text.lower()
    # removing special characters
    text = ''.join([char for char in text if char not in punctuation])
    # removing numbers 
    text = re.sub(pattern = r'\d+',
                  repl = '',
                   string = text)
    # removing extra whitespace
    text = re.sub(pattern = r'\s+',
                  repl = ' ',
                  string = text)
    
    cleaned = text  
    return cleaned

df['clean_lyrics'] = df['Lyrics'].apply(light_clean)


In [None]:
# Creating a list of cleaned lyrics
lyrics = df['clean_lyrics'].tolist()

# Build and Fit BERTopic Model 

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords

In [None]:
# Creating custom stopword list for song lyrics
stopwords_nltk = stopwords.words('english')
lyrics_stopwords = stopwords_nltk + [
    'the', 'on', 'and', 'oh', 'ta', 'lo', 'la', 'du', 'yeah', 'baby',
    'let', 'pa', 'oop', 'got', 'na', 'ba', 'go', 'get', 'like', 'nah', 
    'ha', 'da', 'bop', 'ooh', 'ah', 'know', 'yah', 'whoo', 'shes', 'oo',
    'im', 'ohoh', 'whats', 'ya', 'huh', 'youre', 'girls', 'girl', 'boys', 
    'take', 'wanna', 'dont', 'self', 'want', 'isnt', 'yeh', 'jo', 'cant', 
    'hes', 'boy', 'uhuh', 'ive', 'la', 'lo', 'lola', 'du', 'oh', 'yeah', 'la la', 'lo lo', 'du du', 'got', 'gonna',
    'ooh', 'get', 'take', 'da', 'know', 'like', 'one', 'said', 'ron',
    'let', 'go', 'come', 'want', 'toke', 'say', 'day', 'good', 'see', 'make',
    'niggas', 'nigga', 'ya', 'choo', 'na', 'uh', 'yuh', 'hmm', 'wanna',
    'doo', 'dat', 'woo', 'nah', 'whoomp', 'yo', 'whoo', 'whatta', 'wa',
    'johnny', 'boaw', 'th', 'whoa', 'eh', 'ooo', 'um', 'dum', 'thy', 'aa',
    'the', 'on', 'and', 'ta', 'pa', 'oop', 'ha', 'bop', 'ah', 'yah', 'shes',
    'oo', 'im', 'ohoh', 'whats', 'huh', 'youre','dont', 'self', 'isnt', 'yeh', 
    'jo', 'cant', 'hes', 'uhuh', 'ive', 'yah', 'hmmmmm', 'ohh', 'woulda', 'nae',
    'lil', 'chh', 'ayy', 'aha', 'dit', 'ding', 'ling', 'feat', 'justin', 'mbabarara',
    'rainin', 'burnin', 'em', 'drake', 'sylvia', 'alejandro', 'mmm', 'watcha'
    'somethin', 'rumours', 'pop', 'mony', 'kyrie', 'eleison', 'bit', 'alright',
    'thats', 'give'
    
    ]

vectorizer = CountVectorizer(stop_words=lyrics_stopwords)
            

In [12]:
from bertopic import BERTopic
from umap import UMAP
from hdbscan import HDBSCAN

In [19]:
# Dimensionality reduction
umap_model = UMAP(
    n_neighbors=10,       # local vs global structure
    n_components=10,      # dimensionality of reduced space
    min_dist=0.0,         # tighter clusters
    metric="cosine",
    random_state=42)

# Density-based clustering
hdbscan_model = HDBSCAN(
    min_cluster_size=20,  # how many docs per topic (granularity)
    min_samples=10,        # how strict the clustering is
    prediction_data=True,
    cluster_selection_method="leaf") # how clusters are calculated -> "leaf" results in more granular clusters 

# Topic model
topic_model_v3 = BERTopic(
    embedding_model= 'all-distilroberta-v1',  # can also try another model, e.g. all-mpnet-base-v2
    vectorizer_model=vectorizer,          # custom stopwords here
    umap_model=umap_model,
    hdbscan_model=hdbscan_model,
    min_topic_size=20,                    # smallest amount of document allowed in a topic -> smaller number means more granular topics
    calculate_probabilities=True,
    verbose=True)

topics_v3, probabilities_v3 = topic_model_v3.fit_transform(lyrics)


2025-11-26 22:54:49,682 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/31 [00:00<?, ?it/s]

2025-11-26 23:02:00,110 - BERTopic - Embedding - Completed ✓
2025-11-26 23:02:00,113 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2025-11-26 23:02:03,861 - BERTopic - Dimensionality - Completed ✓
2025-11-26 23:02:03,863 - BERTopic - Cluster - Start clustering the reduced embeddings
2025-11-26 23:02:04,008 - BERTopic - Cluster - Completed ✓
2025-11-26 23:02:04,032 - BERTopic - Representation - Fine-tuning topics using representation models.
2025-11-26 23:02:04,264 - BERTopic - Representation - Completed ✓


In [20]:
topic_model_v3.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,585,-1_love_night_time_feel,"[love, night, time, feel, right, tell, heart, ...",[ i feel the night explode when were together ...
1,0,124,0_love_never_time_could,"[love, never, time, could, ill, always, hold, ...",[ill always love you for the rest of my days y...
2,1,71,1_love_fresh_step_tell,"[love, fresh, step, tell, shell, never, whos, ...",[ that girl thinks that shes so fine that soon...
3,2,58,2_back_love_wild_built,"[back, love, wild, built, right, city, elvis, ...",[ lets do it workin all week to for my money s...
4,3,41,3_love_cherish_heaven_hear,"[love, cherish, heaven, hear, lets, heart, nee...",[ with all my heart i love you baby stay with ...
5,4,28,4_wind_wild_running_ride,"[wind, wild, running, ride, going, city, glory...",[when the going gets tough the tough get going...
6,5,27,5_seeds_america_sowing_usa,"[seeds, america, sowing, usa, passion, rock, c...",[ theres a black man with a black cat livin in...
7,6,25,6_heart_change_remember_love,"[heart, change, remember, love, soul, listen, ...",[baby look at me and tell me what you see you ...
8,7,22,7_call_nasty_wild_love,"[call, nasty, wild, love, automatic, dress, me...",[ your love is like bad medicine bad medicine ...


### Review Interpretable Topics

In [None]:
# Love that provides comfort and reassurance 
# Cherishing love as time passes and world changes 
# Love that makes someone feel happy 
lyrics_per_topic = topic_model_v3.get_representative_docs()
lyrics_per_topic[3]

# Topic Theme = Emotionally Secure Love that Nurtures and Uplifts; Comforting Love 

[' with all my heart i love you baby stay with me and you will see my arms will hold you baby never leave cause i believe im in love sweet love hear me calling out your name i feel no shame im in love sweet love dont you ever go away itll always be this way your heart has called me closer to you i will be all that you need just trust in what were feeling never leave cause baby i believe in this love sweet love hear me calling out your name i feel no shame im in love sweet love dont you ever go away itll always be this way theres no stronger love in this world oh baby no youre my man im your girl ill never go wait and see cant be wrong dont you know this is where you belong oh the sweetest dream a lovely baby stay right here never fear i will be all that you need never leave cause baby i believe in this love sweet love hear me calling out your name i feel no shame im in love sweet love dont you ever go away itll always be this way sweet love oh no no no no no no no sweet love oh with al

In [None]:
# Rising to a challenge when things get tough
# Storytelling: Somone escaping a death sentence by fleeing to Mexico 
# Enduring the challenges of life and holding on to the connections made along the way 
lyrics_per_topic = topic_model_v3.get_representative_docs()
lyrics_per_topic[4]

# Topic Theme = Journey Through Hardship; Wind as a Metaphor 

['when the going gets tough the tough get going tough tough huh huh huh when the going gets tough the tough get ready yeah ooooh do da do da i got something to tell you i got something to say im gonna put this dream in motion never let nothing stand in my way when the going gets tough the tough get going im gonna get myself cross the river thats the price im willing to pay im gonna make you stand and deliver and give me love in the oldfashion way woooh darlin ill climb any mountain darlin ill do anything ooh ooh can i touch you can i touch you and do the things that lovers do ooh ooh want to hold you wanna hold you i gotta get it through to you oooh when the going gets tough the tough get going when the going gets rough the tough get rough hey hey hey hey hey ooooh baby im gonna buy me a oneway ticket nothins gonna hold me back your loves like a slow train coming slow train coming and i can feel it coming down the track woh darlin ill climb any mountain darlin ill do anything ooh ooh c

In [None]:
# Critique to the American Dream; expose struggle with race, class, and survival in America
# Celebrates freedoms and opportunities of life in the United States
# Message of Hope and call to action against greed and division 
lyrics_per_topic = topic_model_v3.get_representative_docs()
lyrics_per_topic[5]

# Topic Theme = Varying Perspectives on the American Dream

[' theres a black man with a black cat livin in a black neighborhood hes got an interstate runnin through his front yard you know he thinks that hes got it so good and theres a woman in the kitchen cleanin up the evenin slop and he looks at her and says hey darlin i can remember when you could stop a clock oh but aint that america for you and me aint that america somethin to see baby aint that america home of the free little pink houses for you and me theres a young man in a tshirt listenin to a rockin rollin station hes got greasy hair greasy smile he says lord this must be my destination cause they told me when i was younger boy youre gonna be president but just like everything else those old crazy dreams just kinda came and went oh but aint that america for you and me aint that america somethin to see baby aint that america home of the free little pink houses for you and me well theres people and more people what do they know know know go to work in some high rise and vacation down 

### Adding Topics to Dataframe

In [28]:
df['Topics'] = topics_v3
df.head()

Unnamed: 0,Year,Artist,Song Title,Rank,Lyrics,Nouns,Verbs,Adverbs,Corpus,Word Counts,Unique Word Counts,clean_lyrics,Topics
1980,1980,Blondie,Call Me,1,"​ Colour me your colour, baby Colour me your...",​ colour baby car colour darling chart line da...,colour colour colour know come know come call ...,anytime never anytime anytime anywhere anytime...,colour colour baby colour car colour colour da...,388,125,​ colour me your colour baby colour me your ca...,7
1981,1980,Pink Floyd,Another Brick In The Wall,2,One day not to far from this one We thought i...,day one back racist back health care racist fo...,think come rise come turn need need leave need...,to far never fast then right again all in all ...,day far think come el trumponation rise fast c...,171,69,one day not to far from this one we thought it...,5
1982,1980,Olivia Newton-John,Magic,3,Come take my hand You should know me I've alw...,hand mind dream road mistake nothing way hope ...,come take know know guide build have start be ...,always kind now ever home so anytime ever ever,come hand know mind know kind guide build drea...,244,85,come take my hand you should know me ive alwa...,6
1983,1980,Michael Jackson,Rock With You,4,"Girl, close your eyes Let that rhythm get int...",eye rhythm nothing mind heat boogie beat love ...,close let get try fight be relax groove got fe...,back away out there just so far away forever a...,girl close eye let rhythm try fight be relax m...,256,93,girl close your eyes let that rhythm get into...,6
1984,1980,Captain and Tennille,Do That To Me One More Time,5,Do that to me one more time Once is never eno...,time man time man baby time heart time word ba...,do do get kiss do pass hear tell hear get say ...,once never never just once again once just nev...,time man like time man like oh kiss like oh ba...,201,50,do that to me one more time once is never eno...,-1


In [None]:
#df.to_csv('bert1980s.csv', index=False)