In [1]:
import billboard
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import base64
from requests import post, get
import json
import time
from datetime import datetime, timedelta

from client_info import CLIENT_ID
from client_info import CLIENT_SECRET

In [2]:
# Define start and end years
start_year = 2006
end_year = 2024

# Initialize a dictionary to store DataFrames by year
dataframes_by_year = {}

# Initialize an empty list to store all data
data = []

# Initialize a dictionary to store the title counts for each year
title_counts_by_year = {}

# Loop through each year and fetch the year-end chart data
for year in range(start_year, end_year + 1):
    print(f"Fetching year-end data for {year}...")
    try:
        # Initialize a list to store the current year's data
        year_data = []

        # Fetch the year-end chart for the given year
        chart = billboard.ChartData('hot-100-songs', year=year)
        for entry in chart:
            entry_data = {
                "Year": year,
                "Rank": entry.rank,
                "Title": entry.title,
                "Artist": entry.artist
            }
            data.append(entry_data)  # Add to overall data list
            year_data.append(entry_data)  # Add to year-specific list

        # Create a DataFrame for the current year
        year_df = pd.DataFrame(year_data)
        dataframes_by_year[year] = year_df

        # Count the number of titles for the current year
        title_counts_by_year[year] = len(year_df)

    except Exception as e:
        print(f"Error fetching data for {year}: {e}")

print("Year-end data fetching complete!")

# Display the total title counts for each year. \n is an escape sequence that inserts a newline character, creating a blank line before the message.
print("\nTitle counts by year:")
for year, count in title_counts_by_year.items():
    print(f"{year}: {count} titles")


Fetching year-end data for 2006...
Fetching year-end data for 2007...
Fetching year-end data for 2008...
Fetching year-end data for 2009...
Fetching year-end data for 2010...
Fetching year-end data for 2011...
Fetching year-end data for 2012...
Fetching year-end data for 2013...
Fetching year-end data for 2014...
Fetching year-end data for 2015...
Fetching year-end data for 2016...
Fetching year-end data for 2017...
Fetching year-end data for 2018...
Fetching year-end data for 2019...
Fetching year-end data for 2020...
Fetching year-end data for 2021...
Fetching year-end data for 2022...
Fetching year-end data for 2023...
Fetching year-end data for 2024...
Year-end data fetching complete!

Title counts by year:
2006: 100 titles
2007: 100 titles
2008: 100 titles
2009: 100 titles
2010: 100 titles
2011: 99 titles
2012: 100 titles
2013: 100 titles
2014: 100 titles
2015: 100 titles
2016: 99 titles
2017: 100 titles
2018: 100 titles
2019: 100 titles
2020: 100 titles
2021: 100 titles
2022: 100

In [5]:
# Convert the full data list to a DataFrame
df_all_years = pd.DataFrame(data)

In [7]:
# Define the two new rows
new_row1 = {
    "Year": 2011,
    "Rank": 7,
    "Title": "Fuck You (Forget You)",
    "Artist": "CeeLo Green"
}

new_row2 = {
    "Year": 2016,
    "Rank": 87,
    "Title": "All the Way Up",
    "Artist": "Fat Joe and Remy Ma featuring French Montana and Infared"
}

# Add the new rows to df_all_years
df_all_years = pd.concat([df_all_years, pd.DataFrame([new_row1, new_row2])], ignore_index=True)

# Sort the DataFrame by Year and Rank for consistency
df_all_years = df_all_years.sort_values(by=["Year", "Rank"]).reset_index(drop=True)

# Update the title counts for each year
title_counts_by_year = df_all_years.groupby("Year").size().to_dict()

# Display the updated title counts by year
print("\nUpdated Title counts by year:")
for year, count in title_counts_by_year.items():
    print(f"{year}: {count} titles")

# Display the updated DataFrame
print("\nUpdated DataFrame:")
print(df_all_years)


Updated Title counts by year:
2006: 100 titles
2007: 100 titles
2008: 100 titles
2009: 100 titles
2010: 100 titles
2011: 100 titles
2012: 100 titles
2013: 100 titles
2014: 100 titles
2015: 100 titles
2016: 100 titles
2017: 100 titles
2018: 100 titles
2019: 100 titles
2020: 100 titles
2021: 100 titles
2022: 100 titles
2023: 100 titles
2024: 100 titles

Updated DataFrame:
      Year  Rank             Title                                Artist
0     2006     1           Bad Day                         Daniel Powter
1     2006     2       Temperature                             Sean Paul
2     2006     3       Promiscuous     Nelly Furtado Featuring Timbaland
3     2006     4  You're Beautiful                           James Blunt
4     2006     5    Hips Don't Lie         Shakira Featuring Wyclef Jean
...    ...   ...               ...                                   ...
1895  2024    96       Bulletproof                            Nate Smith
1896  2024    97              FE!N  Travis

In [9]:
# Extract the Title column
titles = df_all_years['Title']

titles_list = df_all_years['Title'].tolist()

# Display the Title column
print(titles_list)

['Bad Day', 'Temperature', 'Promiscuous', "You're Beautiful", "Hips Don't Lie", 'Unwritten', 'Crazy', "Ridin'", 'SexyBack', 'Check On It', 'Be Without You', 'Grillz', 'Over My Head (Cable Car)', 'Me & U', 'Buttons', 'Run It!', 'So Sick', "It's Goin' Down", 'SOS', 'I Write Sins Not Tragedies', 'Move Along', 'London Bridge', 'Dani California', 'Snap Yo Fingers', 'Lean Wit It, Rock Wit It', 'What Hurts The Most', 'How To Save A Life', 'Unfaithful', 'Chasing Cars', 'Lips Of An Angel', 'Everytime We Touch', "Ain't No Other Man", 'Dance, Dance', 'Gold Digger', 'Money Maker', 'Ms. New Booty', '(When You Gonna) Give It Up To Me', 'Photograph', 'Because Of You', 'Stickwitu', "I'm N Luv (Wit A Stripper)", 'My Humps', "Where'd You Go", 'Yo (Excuse Me Miss)', 'Walk Away', 'Laffy Taffy', 'What You Know', 'Dirty Little Secret', "Savin' Me", "Don't Forget About Us", 'Sexy Love', 'U And Dat', 'Far Away', "What's Left Of Me", 'So What', 'Do It To It', 'Black Horse & The Cherry Tree', 'There It Go! (The

In [11]:
client_id = CLIENT_ID
client_secret = CLIENT_SECRET

In [13]:
def get_token():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
    "Authorization": "Basic " + auth_base64,
    "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type": "client_credentials"}
    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result["access_token"]
    return token

token = get_token()
print(token)

BQDPcIwGjxBngihr0IrJkJMdMnHbRrVogBLxpfZIYAGDw9jJVbbU7dgKad6h7KwfAPGelYiboaiyFt6Sx3B-QzQAkYP5VzwzGB0fOMug0UZnLZlNvpg


In [15]:
def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

In [17]:
def search_for_song(token, song_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    params = {
        "q": song_name,
        "type": "track",
        "limit": 1
    }
    
    # Perform the request
    response = requests.get(url, headers=headers, params=params)
    
    # Parse the JSON result
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error for song '{song_name}': {response.status_code}, {response.text}")
        return None

def search_all_songs(token, titles):
    results = []
    for title in titles:
        print(f"Searching for: {title}")
        result = search_for_song(token, title)
        if result:
            if result['tracks']['items']:
                track = result['tracks']['items'][0]
                artist_id = track['artists'][0]['id']
                duration_ms = track['duration_ms']
                popularity = track['popularity']
                release_date = track['album']['release_date']
                
                # Get the artist's genres using the artist's Spotify ID
                artist_url = f"https://api.spotify.com/v1/artists/{artist_id}"
                artist_response = requests.get(artist_url, headers=get_auth_header(token))
                
                genres = []
                if artist_response.status_code == 200:
                    artist_data = artist_response.json()
                    genres = artist_data.get('genres', [])

                #Convert ms to minutes and seconds format
                minutes, seconds = divmod(duration_ms // 1000, 60)
                duration_formatted = f"{minutes}:{seconds:02d}"
                
                results.append({
                    "Song Name": track['name'],
                    "Artist": ", ".join(artist['name'] for artist in track['artists']),
                    "Spotify ID": track['id'],
                    "Genres": ", ".join(genres),
                    "Duration": duration_formatted,
                    "Popularity": popularity,
                    "Release Date": release_date
                })
            else:
                results.append({"Song Name": title, "Error": "Track not found"})
        else:
            results.append({"Song Name": title, "Error": "No response from Spotify"})
        
        # Add a small delay between requests to prevent rate limiting
        time.sleep(1)

    sorted_results = sorted(results, key=lambda x: x.get("Release Date", "9999-99-99"))
    
    return results


In [19]:
access_token = token 

spotify_results = search_all_songs(access_token, titles_list)

# Convert results to JSON format
spotify_results_json = json.dumps(spotify_results, indent=4)

# Print the JSON data
print(spotify_results_json)

Searching for: Bad Day
Searching for: Temperature
Searching for: Promiscuous
Searching for: You're Beautiful
Searching for: Hips Don't Lie
Searching for: Unwritten
Searching for: Crazy
Searching for: Ridin'
Searching for: SexyBack
Searching for: Check On It
Searching for: Be Without You
Searching for: Grillz
Searching for: Over My Head (Cable Car)
Searching for: Me & U
Searching for: Buttons
Searching for: Run It!
Searching for: So Sick
Searching for: It's Goin' Down
Searching for: SOS
Searching for: I Write Sins Not Tragedies
Searching for: Move Along
Searching for: London Bridge
Searching for: Dani California
Searching for: Snap Yo Fingers
Searching for: Lean Wit It, Rock Wit It
Searching for: What Hurts The Most
Searching for: How To Save A Life
Searching for: Unfaithful
Searching for: Chasing Cars
Searching for: Lips Of An Angel
Searching for: Everytime We Touch
Searching for: Ain't No Other Man
Searching for: Dance, Dance
Searching for: Gold Digger
Searching for: Money Maker
Searc

In [29]:
with open("spotify_results_json.json", "w") as f:
    json.dump(spotify_results_json, f)

In [21]:
data_df = pd.DataFrame(spotify_results)
data_df.head(5)

Unnamed: 0,Song Name,Artist,Spotify ID,Genres,Duration,Popularity,Release Date
0,Bad Day,Daniel Powter,0mUyMawtxj1CJ76kn9gIZK,,3:53,71,2005-02-22
1,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,dancehall,3:38,78,2005-09-27
2,Promiscuous,"Nelly Furtado, Timbaland",2gam98EZKrF9XuOkU13ApN,,4:02,83,2006-01-01
3,You're Beautiful,James Blunt,0vg4WnUWvze6pBOJDTq99k,,3:29,73,2005-08-08
4,Hips Don't Lie (feat. Wyclef Jean),"Shakira, Wyclef Jean",3ZFTkvIE7kyPt6Nu3PEa7V,"latin pop, latin",3:38,84,2005-11-28


In [27]:
data_df.to_json('data.json')

In [23]:
# Save the DataFrame to a CSV file
data_df.to_csv("data_df.csv", index=False)

In [25]:
spotify_ids = data_df['Spotify ID']

spotify_ids = data_df['Spotify ID'].tolist()

# Display the Title column
print(spotify_ids)

['0mUyMawtxj1CJ76kn9gIZK', '0k2GOhqsrxDTAbFFSdNJjT', '2gam98EZKrF9XuOkU13ApN', '0vg4WnUWvze6pBOJDTq99k', '3ZFTkvIE7kyPt6Nu3PEa7V', '3U5JVgI2x4rDyHGObzJfNf', '3AoEQRuFf8zVXWqSLo2UOi', '3kZoay4ANo86ehb6s4RwS9', '0O45fw2L5vsWpdsOdXwNAR', '2RWxrpsFshOBFwRBRstUlQ', '6Y3WvyUG9iE5bQYg38SPtQ', '0Aqlt4N5h6rUtKnd09VXpr', '1N62wozuHCvczCkY4QidpP', '7k6IzwMGpxnRghE7YosnXT', '3BxWKCI06eQ5Od8TY2JBeA', '7xYnUQigPoIDAMPVK79NEq', '6brl7bwOHmGFkNw3MBqssT', '6QdZhhQc4wYEi7amJKuWqu', '30cSNer6TV8x2utjULVeQ5', '4bPQs0PHn4xbipzdPfn6du', '2l57cfmCnOkwNX1tky02n1', '7jRoWfRlLnGYEIEn4t4kbq', '10Nmj3JCNoMeBQ87uw5j8k', '6o3s08kk2fQI37vxGZDrJ1', '7cVmKBwzPsh4Fmb6SplfEm', '4bVuIlGQBMWS7vIhcx8Ae4', '5fVZC9GiM4e8vu99W0Xf6J', '13xxBnXOuiBxVxJI458B0i', '5hnyJvgoWiQUYZttV4wXy6', '40LQiUUUKXVGyNs09lHVjW', '5YJtMNWKe55yr49cyJgxva', '7huo2wvrCgRucUsjdSDLQV', '0a7BloCiNzLDD9qSQHh5m7', '1PS1QMdUqOal0ai3Gt7sDQ', '2JpUkUR0OsOlUUfm6iS8ic', '01yJb0xHAvf0Sac9eJBy6O', '6k8uthjEbttfWXcGaNTQGD', '1HNkqx9Ahdgi1Ixy2xkKkL', '6CFPFnS9Ec