In [1]:
import billboard
import requests
from pprint import pprint
import re
from datetime import datetime, timedelta
import pickle

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

## tests

In [2]:
date = '1995-05-15'

In [3]:
chart = billboard.ChartData('hot-100', date='2019-04-07')
chart[0:10]

[billboard.ChartEntry(title='Old Town Road', artist='Lil Nas X Featuring Billy Ray Cyrus'),
 billboard.ChartEntry(title='Sunflower (Spider-Man: Into The Spider-Verse)', artist='Post Malone & Swae Lee'),
 billboard.ChartEntry(title='7 Rings', artist='Ariana Grande'),
 billboard.ChartEntry(title='Wow.', artist='Post Malone'),
 billboard.ChartEntry(title='Without Me', artist='Halsey'),
 billboard.ChartEntry(title='Please Me', artist='Cardi B & Bruno Mars'),
 billboard.ChartEntry(title='Bad Guy', artist='Billie Eilish'),
 billboard.ChartEntry(title='Sucker', artist='Jonas Brothers'),
 billboard.ChartEntry(title='Happier', artist='Marshmello & Bastille'),
 billboard.ChartEntry(title='Middle Child', artist='J. Cole')]

In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="4ea6e458711041f38a00cb6c72d32d8f",
                                                           client_secret="a69b5cccf41c434da8a2d1d3aa839e44"))

results = sp.search(q='First Class Jack Harlow', type="track", limit=1)
for idx, track in enumerate(results['tracks']['items']):
    obj = { 'track':track['uri'], 'popularity':track['popularity'] }
    pprint(obj)

{'popularity': 88, 'track': 'spotify:track:1rDQ4oMwGJI7B4tovsBOxc'}


## work

In [5]:
# define spotify auth

spotify_auth = {
    'client_id':'4ea6e458711041f38a00cb6c72d32d8f',
    'client_secret': 'a69b5cccf41c434da8a2d1d3aa839e44'
}

In [6]:
def get_spotify_track(title, artist, spotify_auth):
    '''
    a function that takes a track and artist as string arguments
    alongside a spotify_auth object with client_id ans client_secret
    and returns the spotify track:uri and track:popularity
    '''
    
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
        client_id=spotify_auth['client_id'], 
        client_secret=spotify_auth['client_secret']
    ))
    
    results = sp.search(q=f'{title} {artist}', type="track", limit=1)
    # this loop feels unecessary
    for idx, track in enumerate(results['tracks']['items']):
        obj = { 'uri':track['uri'], 'popularity':track['popularity'] }
        return obj

In [7]:
def get_conception_date(date):
    '''
    a function that takes a date_str yyyy-mm-dd
    and returns a date_str yyyy-mm-dd
    for the date 270 days prior
    '''
    
    date = datetime.strptime(date, "%Y-%m-%d").date()
    conception_date = date - timedelta(days=270)
    date_str = datetime.strftime(conception_date, "%Y-%m-%d")
    return date_str

In [8]:
def get_conception_date_tracks(date, mixtape_len):
    '''
    a function to return a cc2000 object for site utlity
    
    function takes:
        - a date_str yyyy-mm-dd
        - a mixtape_len
        
    and returns the indexed billboard top-100 track playing 270 days before the input date
    '''
    
    # validate date
    if not re.search("\d\d\d\d-\d\d-\d\d", date):
        print('incorrect date format')
        return
    
    # get conception date
    # conception_date = get_conception_date(date)
    # date = conception_date
    
    
    # get hot 100 chart for date
    try:
        chart = billboard.ChartData('hot-100', date=date)
        # chart = chart[0:mixtape_len]

    except:
        print('chart error')
        return
    
    if not chart:
        return # return if no chart object
    
    tracks = []
    i = 0
    spicy = mixtape_len
    num_tracks = 0
    
    while num_tracks < mixtape_len:
        # print(num_tracks)
        try:
            track = {}
            track['title'] = chart[i].title
            track['artist'] = chart[i].artist
            track['spicy'] = spicy

            track_spotify_data = get_spotify_track(chart[i].title, chart[i].artist, spotify_auth)
            track['spotify_uri'] = track_spotify_data['uri']
            track['popularity'] = track_spotify_data['popularity']

            tracks.append(track)
            i+=1
            spicy -= 1
            num_tracks += 1
            
        except:
            i+=1
            pass # pass if spotify can't find track, still build list of 10/mixtape_len tracks
    
    return tracks

In [9]:
"""
def create_conception_track_day_object(tracks):
    '''
    takes a list of track objects (artist, popularity, sporitfy_uri, title)
    and returns an object with keys relating to populatriy metric (for spicy mettric match)
    '''
    
    if not tracks:
        return {'error':'no tracks'}
    
    sorted_tracks = sorted(tracks, key=lambda x: x['popularity'])
    
    day_object = {}
    i=1
    
    for item in sorted_tracks:
        day_object[i] = item
        i += 1
    
    return day_object
"""

"\ndef create_conception_track_day_object(tracks):\n    '''\n    takes a list of track objects (artist, popularity, sporitfy_uri, title)\n    and returns an object with keys relating to populatriy metric (for spicy mettric match)\n    '''\n    \n    if not tracks:\n        return {'error':'no tracks'}\n    \n    sorted_tracks = sorted(tracks, key=lambda x: x['popularity'])\n    \n    day_object = {}\n    i=1\n    \n    for item in sorted_tracks:\n        day_object[i] = item\n        i += 1\n    \n    return day_object\n"

In [10]:
def make_day_object(date, mixtape_len):
    '''
    a function that takes a date and a mixtape length
    and returns the ordered cc2000 day object with track and spotify data
    tracks are ordered by 1-10 (or mixtape_len) to match cc2000 spicy rank
    '''
    
    tracks = get_conception_date_tracks(date, mixtape_len)
    # day_object = create_conception_track_day_object(tracks)
    
    return tracks

In [None]:
def make_cc2000_data():
    '''
    makes the output dictionary for cc2000
    starts from current date and works backwards to 1958-08-04
    returns object with dates yyyy-mm-dd as keys
    '''
    
    cc2000_data = {}
    
    # end_date_str = '1958-08-04' # this is the day the first billboard hot 100 was released
    # end_date_str = '2022-02-24' # this is a closer test date
    
    end_date_str = '2011-01-01' 
    
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()
    
    # date = datetime.today().date()
    
    date = datetime.strptime('2022-09-04', "%Y-%m-%d").date()
    
    while date >= end_date: # end once billboard runs out of data
        
        try:
   
            date_str = datetime.strftime(date, "%Y-%m-%d")
            day_object = make_day_object(date_str, 10)

        except:
            day_object = null
        
        cc2000_data[date_str] = day_object
        
        try:
            print(date, day_object[0]['title'], day_object[0]['artist'])
        except:
            print(date, 'no data today')
        
        # decrease by one day
        date -= timedelta(days = 7)
    
    return cc2000_data

In [None]:
data = make_cc2000_data()

In [None]:
pickle_out = open("billboard_2011+.pkl","wb")
pickle.dump(data, pickle_out)
pickle_out.close()