In [2]:
pip install requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install lyricsgenius

Collecting lyricsgenius
  Downloading lyricsgenius-3.3.1-py3-none-any.whl.metadata (6.2 kB)
Downloading lyricsgenius-3.3.1-py3-none-any.whl (45 kB)
Installing collected packages: lyricsgenius
Successfully installed lyricsgenius-3.3.1
Note: you may need to restart the kernel to use updated packages.


In [19]:
pip install vaderSentiment

Collecting vaderSentimentNote: you may need to restart the kernel to use updated packages.

  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [29]:
import lyricsgenius  # Import the lyricsgenius library
import time
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd  # For CSV output
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Import VADER for sentiment analysis

# --- Configuration ---
GENIUS_API_TOKEN = "D2sFkXGer0oppU1L2UI7LX3_htrO3-SHw9J0kNhjr3zawTUX68epNiSK0m2nv_cU"  # Replace with your actual Genius API token
ARTIST_NAME = "Beyoncé"  # Changed to Beyoncé
OUTPUT_FILE_CSV = "beyonce_era_lyrics_sentiment_2songs.csv"  # Output CSV file (updated filename)

# --- Era-Based Song List (Two Prominent Songs per Decade - Hardcoded for Beyoncé) ---
ERA_SONGS = {
    "1990s (Destiny's Child)": [ # Updated Era Labels to be more specific
        {"title": "Say My Name", "year": 1999}, # Destiny's Child era
        {"title": "Bills, Bills, Bills", "year": 1999} # Destiny's Child era
    ],
    "2000s (Early Solo)": [ # Updated Era Labels to be more specific
        {"title": "Crazy in Love", "year": 2003}, # Early Solo era
        {"title": "Irreplaceable", "year": 2006}  # Early Solo era
    ],
    "2010s (Mid-Career Power)": [ # Updated Era Labels to be more specific
        {"title": "Single Ladies (Put a Ring on It)", "year": 2008}, # Technically late 2000s, but peak popularity early 2010s, and represents that era
        {"title": "Formation", "year": 2016} # Mid-Career Power era, politically charged
    ],
    "2020s (Mature & Renaissance)": [ # Updated Era Labels to be more specific
        {"title": "Break My Soul", "year": 2022}, # Renaissance era
        {"title": "CUFF IT", "year": 2022} # Renaissance era
    ],
}

def extract_lyrics_genius(artist_name, song_title):
    """Uses lyricsgenius library to search for and extract lyrics."""
    genius = lyricsgenius.Genius(GENIUS_API_TOKEN, retries=3) # Initialize lyricsgenius client, with retries
    try:
        song = genius.search_song(song_title, artist=artist_name, get_full_info=False) # Search for song
        if song:
            return song.lyrics # Return lyrics if found
        else:
            print(f"  Lyrics not found for '{song_title}' by {artist_name} using lyricsgenius.")
            return None
    except Exception as e:
        print(f"  Error fetching lyrics for '{song_title}' by {artist_name} using lyricsgenius: {e}")
        return None

def preprocess_lyrics(lyrics):
    """Preprocesses lyrics: lowercase, tokenization, stop word removal, alphanumeric filter."""
    if not lyrics:
        return []  # Return empty list if lyrics are None
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(lyrics.lower())
    processed_lyrics = [w for w in word_tokens if not w in stop_words and w.isalnum()]
    return processed_lyrics

def extract_keywords_from_lyrics(processed_lyrics, top_n=10):
    """Extracts top N keywords from preprocessed lyrics based on frequency."""
    from collections import Counter
    word_counts = Counter(processed_lyrics)
    top_keywords = [word for word, count in word_counts.most_common(top_n)] # Get top N most common words
    return top_keywords

def analyze_sentiment_vader(lyrics):
    """Analyzes sentiment of lyrics using VADER."""
    analyzer = SentimentIntensityAnalyzer() # Initialize VADER sentiment analyzer
    vs = analyzer.polarity_scores(lyrics) # Get polarity scores
    sentiment_label = "Neutral" # Default label
    if vs['compound'] >= 0.05:
        sentiment_label = "Positive"
    elif vs['compound'] <= -0.05:
        sentiment_label = "Negative"
    return vs, sentiment_label # Return scores and label

def fetch_era_lyrics_and_sentiment():
    """Fetches lyrics, performs sentiment analysis, and prepares data for CSV output."""
    print(f"Starting to fetch era-based lyrics and sentiment for artist: {ARTIST_NAME} (2 songs per era) using lyricsgenius")

    nltk.download('stopwords') # Download stopwords if you haven't already
    nltk.download('punkt')     # Download punkt tokenizer

    era_data_list = [] # List to hold dictionaries for each song's data

    for era, song_list in ERA_SONGS.items(): # Iterate through eras and their song lists
        print(f"\n--- Era: {era} ---") # Print era header
        for song_info in song_list: # Iterate through songs in the current era
            song_title = song_info["title"]
            expected_year = song_info["year"]

            print(f"\n  Fetching data for song: {song_title} ({expected_year})")

            lyrics = extract_lyrics_genius(ARTIST_NAME, song_title)

            if lyrics:
                processed_lyrics = preprocess_lyrics(lyrics) # Preprocess lyrics
                keywords_lyrics = extract_keywords_from_lyrics(processed_lyrics) # Extract keywords from lyrics

                sentiment_scores, sentiment_label = analyze_sentiment_vader(lyrics) # Analyze sentiment

                era_data_list.append({ # Append dictionary for CSV row
                    "era": era,
                    "track_name": song_title,
                    "artist_name": ARTIST_NAME,
                    "year": expected_year,
                    "lyrics": lyrics, # Keep full lyrics (optional)
                    "processed_lyrics": " ".join(processed_lyrics),
                    "keywords_lyrics": ", ".join(keywords_lyrics),
                    "sentiment_score_neg": sentiment_scores['neg'], # Sentiment scores
                    "sentiment_score_neu": sentiment_scores['neu'],
                    "sentiment_score_pos": sentiment_scores['pos'],
                    "sentiment_score_compound": sentiment_scores['compound'],
                    "sentiment_label": sentiment_label # Sentiment label
                })
                print(f"    Retrieved lyrics, extracted keywords, performed sentiment analysis: {sentiment_label} (Compound Score: {sentiment_scores['compound']:.4f})")
            else:
                print(f"    Could not retrieve lyrics for: {song_title} ({expected_year})")
            time.sleep(1) # Respect rate limits

    return era_data_list

def save_era_data_to_csv(era_data_list, filename):
    """Saves era data (list of dictionaries) to a CSV file using Pandas."""
    df = pd.DataFrame(era_data_list) # Create Pandas DataFrame from list of dictionaries
    try:
        df.to_csv(filename, index=False, encoding='utf-8') # Save to CSV, no index column
        print(f"Era data saved to CSV file: {filename}")
    except Exception as e:
        print(f"Error saving era data to CSV file: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    era_data = fetch_era_lyrics_and_sentiment() # Fetch data and sentiment
    if era_data:
        save_era_data_to_csv(era_data, OUTPUT_FILE_CSV) # Save to CSV

        print("\n--- Theme & Sentiment Evolution Timeline (Console Output - from CSV data) ---")
        df_output = pd.read_csv(OUTPUT_FILE_CSV) # Read CSV back to DataFrame for display
        for index, row in df_output.iterrows(): # Iterate through DataFrame rows
            print(f"Era: {row['era']} ({row['year']}), Song: {row['track_name']}, Sentiment: {row['sentiment_label']} (Compound: {row['sentiment_score_compound']:.4f}), Lyrics Keywords: {row['keywords_lyrics']}")

        print(f"\nEra-based data fetching, sentiment analysis, and saving to CSV complete for artist: {ARTIST_NAME} (2 songs per era)")
    else:
        print(f"No era data fetched. Check for errors during data retrieval.")

Starting to fetch era-based lyrics and sentiment for artist: Beyoncé (2 songs per era) using lyricsgenius

--- Era: 1990s (Destiny's Child) ---

  Fetching data for song: Say My Name (1999)
Searching for "Say My Name" by Beyoncé...


[nltk_data] Downloading package stopwords to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9997)

  Fetching data for song: Bills, Bills, Bills (1999)
Searching for "Bills, Bills, Bills" by Beyoncé...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9745)

--- Era: 2000s (Early Solo) ---

  Fetching data for song: Crazy in Love (2003)
Searching for "Crazy in Love" by Beyoncé...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9828)

  Fetching data for song: Irreplaceable (2006)
Searching for "Irreplaceable" by Beyoncé...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9429)

--- Era: 2010s (Mid-Career Power) ---

  Fetching data for song: Single Ladies (Put a Ring on It) (2008)
Searching for "Single Ladies (Put a Ring on It)" by Beyoncé...
Done.
    Retrieved lyrics, extracted keywords, performed sent

In [27]:
import lyricsgenius  # Import the lyricsgenius library
import time
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd  # For CSV output
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Import VADER for sentiment analysis

# --- Configuration ---
GENIUS_API_TOKEN = "D2sFkXGer0oppU1L2UI7LX3_htrO3-SHw9J0kNhjr3zawTUX68epNiSK0m2nv_cU"  # Replace with your actual Genius API token
ARTIST_NAME = "Eminem"
OUTPUT_FILE_CSV = "eminem_era_lyrics_sentiment_2songs.csv"  # Output CSV file (updated filename)

# --- Era-Based Song List (Two Prominent Songs per Decade - Hardcoded) ---
ERA_SONGS = {
    "1990s": [
        {"title": "My Name Is", "year": 1999},
        {"title": "Role Model", "year": 1999} # Added second 90s song
    ],
    "2000s": [
        {"title": "Stan", "year": 2000},
        {"title": "Without Me", "year": 2002} # Added second 2000s song
    ],
    "2010s": [
        {"title": "Not Afraid", "year": 2010},
        {"title": "Love the Way You Lie", "year": 2010} # Added second 2010s song
    ],
    "2020s": [
        {"title": "Godzilla", "year": 2020},
        {"title": "From the D 2 The LBC", "year": 2022} # Added second 2020s song
    ],
}

def extract_lyrics_genius(artist_name, song_title):
    """Uses lyricsgenius library to search for and extract lyrics."""
    genius = lyricsgenius.Genius(GENIUS_API_TOKEN, retries=3) # Initialize lyricsgenius client, with retries
    try:
        song = genius.search_song(song_title, artist=artist_name, get_full_info=False) # Search for song
        if song:
            return song.lyrics # Return lyrics if found
        else:
            print(f"  Lyrics not found for '{song_title}' by {artist_name} using lyricsgenius.")
            return None
    except Exception as e:
        print(f"  Error fetching lyrics for '{song_title}' by {artist_name} using lyricsgenius: {e}")
        return None

def preprocess_lyrics(lyrics):
    """Preprocesses lyrics: lowercase, tokenization, stop word removal, alphanumeric filter."""
    if not lyrics:
        return []  # Return empty list if lyrics are None
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(lyrics.lower())
    processed_lyrics = [w for w in word_tokens if not w in stop_words and w.isalnum()]
    return processed_lyrics

def extract_keywords_from_lyrics(processed_lyrics, top_n=10):
    """Extracts top N keywords from preprocessed lyrics based on frequency."""
    from collections import Counter
    word_counts = Counter(processed_lyrics)
    top_keywords = [word for word, count in word_counts.most_common(top_n)] # Get top N most common words
    return top_keywords

def analyze_sentiment_vader(lyrics):
    """Analyzes sentiment of lyrics using VADER."""
    analyzer = SentimentIntensityAnalyzer() # Initialize VADER sentiment analyzer
    vs = analyzer.polarity_scores(lyrics) # Get polarity scores
    sentiment_label = "Neutral" # Default label
    if vs['compound'] >= 0.05:
        sentiment_label = "Positive"
    elif vs['compound'] <= -0.05:
        sentiment_label = "Negative"
    return vs, sentiment_label # Return scores and label

def fetch_era_lyrics_and_sentiment():
    """Fetches lyrics, performs sentiment analysis, and prepares data for CSV output."""
    print(f"Starting to fetch era-based lyrics and sentiment for artist: {ARTIST_NAME} (2 songs per era) using lyricsgenius")

    nltk.download('stopwords') # Download stopwords if you haven't already
    nltk.download('punkt')     # Download punkt tokenizer

    era_data_list = [] # List to hold dictionaries for each song's data

    for era, song_list in ERA_SONGS.items(): # Iterate through eras and their song lists
        print(f"\n--- Era: {era} ---") # Print era header
        for song_info in song_list: # Iterate through songs in the current era
            song_title = song_info["title"]
            expected_year = song_info["year"]

            print(f"\n  Fetching data for song: {song_title} ({expected_year})")

            lyrics = extract_lyrics_genius(ARTIST_NAME, song_title)

            if lyrics:
                processed_lyrics = preprocess_lyrics(lyrics) # Preprocess lyrics
                keywords_lyrics = extract_keywords_from_lyrics(processed_lyrics) # Extract keywords from lyrics

                sentiment_scores, sentiment_label = analyze_sentiment_vader(lyrics) # Analyze sentiment

                era_data_list.append({ # Append dictionary for CSV row
                    "era": era,
                    "track_name": song_title,
                    "artist_name": ARTIST_NAME,
                    "year": expected_year,
                    "lyrics": lyrics, # Keep full lyrics (optional)
                    "processed_lyrics": " ".join(processed_lyrics),
                    "keywords_lyrics": ", ".join(keywords_lyrics),
                    "sentiment_score_neg": sentiment_scores['neg'], # Sentiment scores
                    "sentiment_score_neu": sentiment_scores['neu'],
                    "sentiment_score_pos": sentiment_scores['pos'],
                    "sentiment_score_compound": sentiment_scores['compound'],
                    "sentiment_label": sentiment_label # Sentiment label
                })
                print(f"    Retrieved lyrics, extracted keywords, performed sentiment analysis: {sentiment_label} (Compound Score: {sentiment_scores['compound']:.4f})")
            else:
                print(f"    Could not retrieve lyrics for: {song_title} ({expected_year})")
            time.sleep(1) # Respect rate limits

    return era_data_list

def save_era_data_to_csv(era_data_list, filename):
    """Saves era data (list of dictionaries) to a CSV file using Pandas."""
    df = pd.DataFrame(era_data_list) # Create Pandas DataFrame from list of dictionaries
    try:
        df.to_csv(filename, index=False, encoding='utf-8') # Save to CSV, no index column
        print(f"Era data saved to CSV file: {filename}")
    except Exception as e:
        print(f"Error saving era data to CSV file: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    era_data = fetch_era_lyrics_and_sentiment() # Fetch data and sentiment
    if era_data:
        save_era_data_to_csv(era_data, OUTPUT_FILE_CSV) # Save to CSV

        print("\n--- Theme & Sentiment Evolution Timeline (Console Output - from CSV data) ---")
        df_output = pd.read_csv(OUTPUT_FILE_CSV) # Read CSV back to DataFrame for display
        for index, row in df_output.iterrows(): # Iterate through DataFrame rows
            print(f"Era: {row['era']} ({row['year']}), Song: {row['track_name']}, Sentiment: {row['sentiment_label']} (Compound: {row['sentiment_score_compound']:.4f}), Lyrics Keywords: {row['keywords_lyrics']}")

        print(f"\nEra-based data fetching, sentiment analysis, and saving to CSV complete for artist: {ARTIST_NAME} (2 songs per era)")
    else:
        print(f"No era data fetched. Check for errors during data retrieval.")

Starting to fetch era-based lyrics and sentiment for artist: Eminem (2 songs per era) using lyricsgenius

--- Era: 1990s ---

  Fetching data for song: My Name Is (1999)
Searching for "My Name Is" by Eminem...


[nltk_data] Downloading package stopwords to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.9918)

  Fetching data for song: Role Model (1999)
Searching for "Role Model" by Eminem...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.9938)

--- Era: 2000s ---

  Fetching data for song: Stan (2000)
Searching for "Stan" by Eminem...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9945)

  Fetching data for song: Without Me (2002)
Searching for "Without Me" by Eminem...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.9975)

--- Era: 2010s ---

  Fetching data for song: Not Afraid (2010)
Searching for "Not Afraid" by Eminem...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9488)

  Fetching data for song: Love the Way You Lie (2010)
Searching for 

In [31]:
import lyricsgenius  # Import the lyricsgenius library
import time
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd  # For CSV output
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Import VADER for sentiment analysis

# --- Configuration ---
GENIUS_API_TOKEN = "D2sFkXGer0oppU1L2UI7LX3_htrO3-SHw9J0kNhjr3zawTUX68epNiSK0m2nv_cU"  # API key provided by user - Coldplay
ARTIST_NAME = "Coldplay"  # Changed to Coldplay
OUTPUT_FILE_CSV = "coldplay_era_lyrics_sentiment_2songs.csv"  # Output CSV file (updated filename)

# --- Era-Based Song List (Two Prominent Songs per Decade - Hardcoded for Coldplay) ---
ERA_SONGS = {
    "Early 2000s (Parachutes/AROBTTH)": [ # Updated Era Labels to be more specific
        {"title": "Yellow", "year": 2000}, # Parachutes Era
        {"title": "Clocks", "year": 2002} # AROBTTH Era
    ],
    "Mid 2000s (X&Y/Viva la Vida)": [ # Updated Era Labels to be more specific
        {"title": "Fix You", "year": 2005}, # X&Y Era
        {"title": "Viva la Vida", "year": 2008}  # Viva la Vida Era
    ],
    "2010s (Mylo Xyloto/Ghost Stories)": [ # Updated Era Labels to be more specific
        {"title": "Paradise", "year": 2011}, # Mylo Xyloto Era
        {"title": "Magic", "year": 2014} # Ghost Stories Era - (Adventure of a Lifetime could also be considered)
    ],
    "2020s (Everyday Life/MOTS)": [ # Updated Era Labels to be more specific
        {"title": "Orphans", "year": 2019}, # Everyday Life Era (released late 2019, representing 2020s sound)
        {"title": "Higher Power", "year": 2021} # Music of the Spheres Era
    ],
}

def extract_lyrics_genius(artist_name, song_title):
    """Uses lyricsgenius library to search for and extract lyrics."""
    genius = lyricsgenius.Genius(GENIUS_API_TOKEN, retries=3) # Initialize lyricsgenius client, with retries
    try:
        song = genius.search_song(song_title, artist=artist_name, get_full_info=False) # Search for song
        if song:
            return song.lyrics # Return lyrics if found
        else:
            print(f"  Lyrics not found for '{song_title}' by {artist_name} using lyricsgenius.")
            return None
    except Exception as e:
        print(f"  Error fetching lyrics for '{song_title}' by {artist_name} using lyricsgenius: {e}")
        return None

def preprocess_lyrics(lyrics):
    """Preprocesses lyrics: lowercase, tokenization, stop word removal, alphanumeric filter."""
    if not lyrics:
        return []  # Return empty list if lyrics are None
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(lyrics.lower())
    processed_lyrics = [w for w in word_tokens if not w in stop_words and w.isalnum()]
    return processed_lyrics

def extract_keywords_from_lyrics(processed_lyrics, top_n=10):
    """Extracts top N keywords from preprocessed lyrics based on frequency."""
    from collections import Counter
    word_counts = Counter(processed_lyrics)
    top_keywords = [word for word, count in word_counts.most_common(top_n)] # Get top N most common words
    return top_keywords

def analyze_sentiment_vader(lyrics):
    """Analyzes sentiment of lyrics using VADER."""
    analyzer = SentimentIntensityAnalyzer() # Initialize VADER sentiment analyzer
    vs = analyzer.polarity_scores(lyrics) # Get polarity scores
    sentiment_label = "Neutral" # Default label
    if vs['compound'] >= 0.05:
        sentiment_label = "Positive"
    elif vs['compound'] <= -0.05:
        sentiment_label = "Negative"
    return vs, sentiment_label # Return scores and label

def fetch_era_lyrics_and_sentiment():
    """Fetches lyrics, performs sentiment analysis, and prepares data for CSV output."""
    print(f"Starting to fetch era-based lyrics and sentiment for artist: {ARTIST_NAME} (2 songs per era) using lyricsgenius")

    nltk.download('stopwords') # Download stopwords if you haven't already
    nltk.download('punkt')     # Download punkt tokenizer

    era_data_list = [] # List to hold dictionaries for each song's data

    for era, song_list in ERA_SONGS.items(): # Iterate through eras and their song lists
        print(f"\n--- Era: {era} ---") # Print era header
        for song_info in song_list: # Iterate through songs in the current era
            song_title = song_info["title"]
            expected_year = song_info["year"]

            print(f"\n  Fetching data for song: {song_title} ({expected_year})")

            lyrics = extract_lyrics_genius(ARTIST_NAME, song_title)

            if lyrics:
                processed_lyrics = preprocess_lyrics(lyrics) # Preprocess lyrics
                keywords_lyrics = extract_keywords_from_lyrics(processed_lyrics) # Extract keywords from lyrics

                sentiment_scores, sentiment_label = analyze_sentiment_vader(lyrics) # Analyze sentiment

                era_data_list.append({ # Append dictionary for CSV row
                    "era": era,
                    "track_name": song_title,
                    "artist_name": ARTIST_NAME,
                    "year": expected_year,
                    "lyrics": lyrics, # Keep full lyrics (optional)
                    "processed_lyrics": " ".join(processed_lyrics),
                    "keywords_lyrics": ", ".join(keywords_lyrics),
                    "sentiment_score_neg": sentiment_scores['neg'], # Sentiment scores
                    "sentiment_score_neu": sentiment_scores['neu'],
                    "sentiment_score_pos": sentiment_scores['pos'],
                    "sentiment_score_compound": sentiment_scores['compound'],
                    "sentiment_label": sentiment_label # Sentiment label
                })
                print(f"    Retrieved lyrics, extracted keywords, performed sentiment analysis: {sentiment_label} (Compound Score: {sentiment_scores['compound']:.4f})")
            else:
                print(f"    Could not retrieve lyrics for: {song_title} ({expected_year})")
            time.sleep(1) # Respect rate limits

    return era_data_list

def save_era_data_to_csv(era_data_list, filename):
    """Saves era data (list of dictionaries) to a CSV file using Pandas."""
    df = pd.DataFrame(era_data_list) # Create Pandas DataFrame from list of dictionaries
    try:
        df.to_csv(filename, index=False, encoding='utf-8') # Save to CSV, no index column
        print(f"Era data saved to CSV file: {filename}")
    except Exception as e:
        print(f"Error saving era data to CSV file: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    era_data = fetch_era_lyrics_and_sentiment() # Fetch data and sentiment
    if era_data:
        save_era_data_to_csv(era_data, OUTPUT_FILE_CSV) # Save to CSV

        print("\n--- Theme & Sentiment Evolution Timeline (Console Output - from CSV data) ---")
        df_output = pd.read_csv(OUTPUT_FILE_CSV) # Read CSV back to DataFrame for display
        for index, row in df_output.iterrows(): # Iterate through DataFrame rows
            print(f"Era: {row['era']} ({row['year']}), Song: {row['track_name']}, Sentiment: {row['sentiment_label']} (Compound: {row['sentiment_score_compound']:.4f}), Lyrics Keywords: {row['keywords_lyrics']}")

        print(f"\nEra-based data fetching, sentiment analysis, and saving to CSV complete for artist: {ARTIST_NAME} (2 songs per era)")
    else:
        print(f"No era data fetched. Check for errors during data retrieval.")

Starting to fetch era-based lyrics and sentiment for artist: Coldplay (2 songs per era) using lyricsgenius

--- Era: Early 2000s (Parachutes/AROBTTH) ---

  Fetching data for song: Yellow (2000)
Searching for "Yellow" by Coldplay...


[nltk_data] Downloading package stopwords to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9866)

  Fetching data for song: Clocks (2002)
Searching for "Clocks" by Coldplay...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.7399)

--- Era: Mid 2000s (X&Y/Viva la Vida) ---

  Fetching data for song: Fix You (2005)
Searching for "Fix You" by Coldplay...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.7879)

  Fetching data for song: Viva la Vida (2008)
Searching for "Viva la Vida" by Coldplay...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.0834)

--- Era: 2010s (Mylo Xyloto/Ghost Stories) ---

  Fetching data for song: Paradise (2011)
Searching for "Paradise" by Coldplay...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9988)

  Fetching d

In [35]:
import lyricsgenius  # Import the lyricsgenius library
import time
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd  # For CSV output
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Import VADER for sentiment analysis

# --- Configuration ---
GENIUS_API_TOKEN = "YOUR_GENIUS_API_TOKEN"  # Replace with your actual Genius API token - Remember to use YOUR token for best practice!
ARTIST_NAME = "Mariah Carey"  # Changed to Mariah Carey
OUTPUT_FILE_CSV = "mariah_carey_era_lyrics_sentiment_2songs.csv"  # Output CSV file (updated filename)

# --- Era-Based Song List (Two Prominent Songs per Decade - Hardcoded for Mariah Carey) ---
ERA_SONGS = {
    "1990s (Early Career Dominance)": [ # Updated Era Labels to be more specific
        {"title": "Hero", "year": 1993}, # Music Box Era
        {"title": "Fantasy", "year": 1995} # Daydream Era
    ],
    "2000s (Comeback & Emancipation)": [ # Updated Era Labels to be more specific
        {"title": "We Belong Together", "year": 2005}, # The Emancipation of Mimi Era
        {"title": "Touch My Body", "year": 2008}  # E=MC² Era
    ],
    "2010s (Continued Presence)": [ # Updated Era Labels to be more specific
        {"title": "Obsessed", "year": 2009}, # Technically late 2009 from Memoirs of an Imperfect Angel, representing 2010s presence
        {"title": "Beautiful" , "year": 2002} # Charmbracelet Era - While released in 2002, it represents her continued presence in the 2010s public consciousness and radio play
    ],
    "2020s (Christmas & Legacy)": [ # Updated Era Labels to be more specific
        {"title": "All I Want for Christmas Is You", "year": 1994}, # Evergreen Christmas Classic - representing her continued relevance in 2020s
        {"title": "GTFO", "year": 2018} # Caution Era -  Released in 2018, but represents her 2020s activity and continued music releases
    ],
}

def extract_lyrics_genius(artist_name, song_title):
    """Uses lyricsgenius library to search for and extract lyrics."""
    genius = lyricsgenius.Genius(GENIUS_API_TOKEN, retries=3) # Initialize lyricsgenius client, with retries
    try:
        song = genius.search_song(song_title, artist=artist_name, get_full_info=False) # Search for song
        if song:
            return song.lyrics # Return lyrics if found
        else:
            print(f"  Lyrics not found for '{song_title}' by {artist_name} using lyricsgenius.")
            return None
    except Exception as e:
        print(f"  Error fetching lyrics for '{song_title}' by {artist_name} using lyricsgenius: {e}")
        return None

def preprocess_lyrics(lyrics):
    """Preprocesses lyrics: lowercase, tokenization, stop word removal, alphanumeric filter."""
    if not lyrics:
        return []  # Return empty list if lyrics are None
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(lyrics.lower())
    processed_lyrics = [w for w in word_tokens if not w in stop_words and w.isalnum()]
    return processed_lyrics

def extract_keywords_from_lyrics(processed_lyrics, top_n=10):
    """Extracts top N keywords from preprocessed lyrics based on frequency."""
    from collections import Counter
    word_counts = Counter(processed_lyrics)
    top_keywords = [word for word, count in word_counts.most_common(top_n)] # Get top N most common words
    return top_keywords

def analyze_sentiment_vader(lyrics):
    """Analyzes sentiment of lyrics using VADER."""
    analyzer = SentimentIntensityAnalyzer() # Initialize VADER sentiment analyzer
    vs = analyzer.polarity_scores(lyrics) # Get polarity scores
    sentiment_label = "Neutral" # Default label
    if vs['compound'] >= 0.05:
        sentiment_label = "Positive"
    elif vs['compound'] <= -0.05:
        sentiment_label = "Negative"
    return vs, sentiment_label # Return scores and label

def fetch_era_lyrics_and_sentiment():
    """Fetches lyrics, performs sentiment analysis, and prepares data for CSV output."""
    print(f"Starting to fetch era-based lyrics and sentiment for artist: {ARTIST_NAME} (2 songs per era) using lyricsgenius")

    nltk.download('stopwords') # Download stopwords if you haven't already
    nltk.download('punkt')     # Download punkt tokenizer

    era_data_list = [] # List to hold dictionaries for each song's data

    for era, song_list in ERA_SONGS.items(): # Iterate through eras and their song lists
        print(f"\n--- Era: {era} ---") # Print era header
        for song_info in song_list: # Iterate through songs in the current era
            song_title = song_info["title"]
            expected_year = song_info["year"]

            print(f"\n  Fetching data for song: {song_title} ({expected_year})")

            lyrics = extract_lyrics_genius(ARTIST_NAME, song_title)

            if lyrics:
                processed_lyrics = preprocess_lyrics(lyrics) # Preprocess lyrics
                keywords_lyrics = extract_keywords_from_lyrics(processed_lyrics) # Extract keywords from lyrics

                sentiment_scores, sentiment_label = analyze_sentiment_vader(lyrics) # Analyze sentiment

                era_data_list.append({ # Append dictionary for CSV row
                    "era": era,
                    "track_name": song_title,
                    "artist_name": ARTIST_NAME,
                    "year": expected_year,
                    "lyrics": lyrics, # Keep full lyrics (optional)
                    "processed_lyrics": " ".join(processed_lyrics),
                    "keywords_lyrics": ", ".join(keywords_lyrics),
                    "sentiment_score_neg": sentiment_scores['neg'], # Sentiment scores
                    "sentiment_score_neu": sentiment_scores['neu'],
                    "sentiment_score_pos": sentiment_scores['pos'],
                    "sentiment_score_compound": sentiment_scores['compound'],
                    "sentiment_label": sentiment_label # Sentiment label
                })
                print(f"    Retrieved lyrics, extracted keywords, performed sentiment analysis: {sentiment_label} (Compound Score: {sentiment_scores['compound']:.4f})")
            else:
                print(f"    Could not retrieve lyrics for: {song_title} ({expected_year})")
            time.sleep(1) # Respect rate limits

    return era_data_list

def save_era_data_to_csv(era_data_list, filename):
    """Saves era data (list of dictionaries) to a CSV file using Pandas."""
    df = pd.DataFrame(era_data_list) # Create Pandas DataFrame from list of dictionaries
    try:
        df.to_csv(filename, index=False, encoding='utf-8') # Save to CSV, no index column
        print(f"Era data saved to CSV file: {filename}")
    except Exception as e:
        print(f"Error saving era data to CSV file: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    era_data = fetch_era_lyrics_and_sentiment() # Fetch data and sentiment
    if era_data:
        save_era_data_to_csv(era_data, OUTPUT_FILE_CSV) # Save to CSV

        print("\n--- Theme & Sentiment Evolution Timeline (Console Output - from CSV data) ---")
        df_output = pd.read_csv(OUTPUT_FILE_CSV) # Read CSV back to DataFrame for display
        for index, row in df_output.iterrows(): # Iterate through DataFrame rows
            print(f"Era: {row['era']} ({row['year']}), Song: {row['track_name']}, Sentiment: {row['sentiment_label']} (Compound: {row['sentiment_score_compound']:.4f}), Lyrics Keywords: {row['keywords_lyrics']}")

        print(f"\nEra-based data fetching, sentiment analysis, and saving to CSV complete for artist: {ARTIST_NAME} (2 songs per era)")
    else:
        print(f"No era data fetched. Check for errors during data retrieval.")

Starting to fetch era-based lyrics and sentiment for artist: Mariah Carey (2 songs per era) using lyricsgenius

--- Era: 1990s (Early Career Dominance) ---

  Fetching data for song: Hero (1993)
Searching for "Hero" by Mariah Carey...


[nltk_data] Downloading package stopwords to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\A S U
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9921)

  Fetching data for song: Fantasy (1995)
Searching for "Fantasy" by Mariah Carey...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9989)

--- Era: 2000s (Comeback & Emancipation) ---

  Fetching data for song: We Belong Together (2005)
Searching for "We Belong Together" by Mariah Carey...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (Compound Score: -0.9795)

  Fetching data for song: Touch My Body (2008)
Searching for "Touch My Body" by Mariah Carey...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Positive (Compound Score: 0.9989)

--- Era: 2010s (Continued Presence) ---

  Fetching data for song: Obsessed (2009)
Searching for "Obsessed" by Mariah Carey...
Done.
    Retrieved lyrics, extracted keywords, performed sentiment analysis: Negative (