In [1]:
import pandas as pd
import numpy as np
import requests
import swifter
import re

In [2]:
# Read in 1.2 Million Spotify Songs dataset
spotify_1m = pd.read_csv('./Resources/tracks_features.csv')

In [3]:
# Strip brackets and single quotes from artist name
spotify_1m['artist'] = spotify_1m['artists'].str.strip("[']")

# Preview dataframe
spotify_1m.head()

Unnamed: 0,id,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date,artist
0,7lmeHLHBe4nmXzuXc0HDjk,Testify,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],1,1,False,0.47,...,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999,1999-11-02,Rage Against The Machine
1,1wsRitfRRtWyEapl0q22o8,Guerrilla Radio,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],2,1,True,0.599,...,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999,1999-11-02,Rage Against The Machine
2,1hR0fIFK2qRG3f3RF70pb7,Calm Like a Bomb,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],3,1,False,0.315,...,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999,1999-11-02,Rage Against The Machine
3,2lbASgTSoDO7MTuLAXlTW0,Mic Check,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],4,1,True,0.44,...,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999,1999-11-02,Rage Against The Machine
4,1MQTmpYOZ6fcMQc56Hdo7T,Sleep Now In the Fire,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],5,1,False,0.426,...,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999,1999-11-02,Rage Against The Machine


In [4]:
# Read in Billboard Top 100 dataset
df = pd.read_csv('./Resources/charts.csv')

# Convert date column to date type
df['date'] = pd.to_datetime(df['date'])
# Create new column for year
df['year'] = df['date'].dt.year
# Remove special characters from artist and song name
df['artist'] = df['artist'].str.replace(r"\(.*\)","")
# df['song'] = df['song'].str.replace(r"\(.*\)","")

df['song'] = df['song'].str.replace("\"", "") \
                        # .str.replace(",", "") \
                        # .str.replace("!", "")


# Create new column showing number of weeks in #1 spot (if exists)
df['weeks-at-no1'] = df[df['rank']==1].groupby(['song', 'artist', 'rank'], as_index=False).count()['date']

# Remove single quotes/apostrophes from song names
df['song'] = df['song'].str.replace("'", "")

# Preview dataframe
df.head()

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board,year,weeks-at-no1
0,2021-11-06,1,Easy On Me,Adele,1.0,1,3,2021,1.0
1,2021-11-06,2,Stay,The Kid LAROI & Justin Bieber,2.0,1,16,2021,7.0
2,2021-11-06,3,Industry Baby,Lil Nas X & Jack Harlow,3.0,1,14,2021,1.0
3,2021-11-06,4,Fancy Like,Walker Hayes,4.0,3,19,2021,4.0
4,2021-11-06,5,Bad Habits,Ed Sheeran,5.0,2,18,2021,2.0


In [5]:
# Create new dataframe of all unique Billboard charting songs
unique_billboard_tracks_df = df.groupby(['song','artist', 'year'], as_index=False).agg({'peak-rank': 'min', 
                                                                                        'weeks-on-board': 'max',
                                                                                        'weeks-at-no1': 'max'})

# Create separate dataframes for each decade
billboard_1960s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<1970) & (unique_billboard_tracks_df['year']>=1960)]
billboard_1970s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<1980) & (unique_billboard_tracks_df['year']>=1970)]
billboard_1980s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<1990) & (unique_billboard_tracks_df['year']>=1980)]
billboard_1990s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<2000) & (unique_billboard_tracks_df['year']>=1990)]
billboard_2000s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<2010) & (unique_billboard_tracks_df['year']>=2000)]
billboard_2010s = unique_billboard_tracks_df[(unique_billboard_tracks_df['year']<2020) & (unique_billboard_tracks_df['year']>=2010)]
billboard_2020s = unique_billboard_tracks_df[unique_billboard_tracks_df['year']>=2020]

# Create list of dataframes
billboard_dfs = [billboard_1960s, billboard_1970s, billboard_1980s, billboard_1990s, billboard_2000s, billboard_2010s, billboard_2020s]

# Display dataframe
unique_billboard_tracks_df.head()
unique_billboard_tracks_df

# df[(df['artist']=='Beastie Boys') & (df['year']==1986)]

Unnamed: 0,song,artist,year,peak-rank,weeks-on-board,weeks-at-no1
0,#1,Nelly,2001,22,11,
1,#1,Nelly,2002,22,20,
2,#1 Dee Jay,Goody Goody,1978,82,5,
3,#9 Dream,John Lennon,1974,58,2,
4,#9 Dream,John Lennon,1975,9,12,
...,...,...,...,...,...,...
36078,whoknows,Musiq,2004,65,13,
36079,www.memory,Alan Jackson,2000,45,9,
36080,www.memory,Alan Jackson,2001,45,15,
36081,¿Dònde Està Santa Claus? (Where Is Santa Claus?),Augie Rios,1958,47,3,


In [6]:
# SPOTIFY API
# Import dependencies for Spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Import Client ID and Client Secret
from config import cid, secret


# Create objects for accessing Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [7]:
def search_spotify(song_title, artist):
    try:
        # print(f"Trying '{song_title}' by {artist}...")
        searchResults = sp.search(q=f"artist:{artist} track:{song_title}", type="track")
        track_id = searchResults['tracks']['items'][0]['id']
        # print(f"{song_title} by {artist} was successful!")
        # print(track_id)
        return track_id
    except:
        pass
        # print(f"'{song_title}' by {artist} returned 0 results.")

In [8]:
def adjust_parens(song_title):
    combinations = []

    strip_parens = ' '.join(song_title.strip(')(').split(')'))
    strip_parens = ' '.join(strip_parens.strip(' (').split('(')).replace('  ',' ')
    combinations.append(strip_parens)

    try:
        inside_parens = re.findall(r'\(.*?\)', song_title)[0].strip('()').strip()
        
    except IndexError:
        pass
        
    try:
        left_parens = song_title.split(f"({inside_parens})")[0].strip()
        if (left_parens not in combinations) & (len(left_parens) > 0): 
            combinations.append(left_parens)
    except IndexError:
        left_parens = "NA"

    try:
        right_parens = song_title.split(f"({inside_parens})")[1].strip()
        if (right_parens not in combinations) & (len(right_parens) > 0): 
            combinations.append(right_parens)
    except IndexError:
        right_parens = "NA"

    return combinations

In [14]:
# Create function to retrieve track ID from Spotify given the artist and song title
def get_track_id(song_title, artist):
    while True:
        # Select first artist if multiple listed with "Featuring" keyword
        if 'Featuring' in artist:
            artist = artist.split(' Featuring ')[0]
        # Select first artist if multiple listed with "with" keyword
        elif ' with ' in artist:
            artist = artist.split(' with ')[0]
        elif ' With ' in artist:
            artist = artist.split(' With ')[0]
        elif "," in artist:
            artist = artist.split(',')[0]

        # Make API call
        found_id = search_spotify(song_title, artist)
        if found_id:
            return found_id

        # Search artist and song title (replacing words ending in "in" to "ing")
        song_title = re.sub(r"in\b", 'ing ', song_title)
        found_id = search_spotify(song_title, artist)
        if found_id:
            return found_id

        if '(' in song_title:
            for item in adjust_parens(song_title):
                found_id = search_spotify(item, artist)
                if found_id:
                    return found_id
        # Check for '/' character in song_title
        if '/' in song_title:
            # Try string on left side of '/'
            song_title = song_title.split('/')[0]
            found_id = search_spotify(song_title, artist)
            if found_id:
                return found_id

            # Try string on right side of '/'
            try:
                song_title = song_title.split('/')[1]
                found_id = search_spotify(song_title, artist)
                if found_id:
                    return found_id
            except:
                pass

        # Check for '&' character
        if ' & ' in artist:
            artist = artist.split(' & ')[0]
            found_id = search_spotify(song_title, artist)
            if found_id:
                return found_id
        
        if ' & ' in artist:
            artist = artist.split('X')[0]
            found_id = search_spotify(song_title, artist)
            if found_id:
                return found_id
            
        # Print song title and artist for non-match
        if found_id:
            return found_id
        
        break

            # print(f"No ID found for '{song_title}' by {artist}")
    

In [10]:
def get_audio_features(id):
    searchResults = sp.audio_features(id)[0]
    return searchResults

In [16]:
song = 'AM'
artist = "Nio Garcia X J Balvin X Bad Bunny"

result = sp.search(q=f"track:{song} artist:{artist} ", type="track")#['tracks']['items'][0]['id']

# result
# sp.search(q=f"track:{song} artist:{artist}", type='track', limit=1)
print(get_track_id(song, artist))
    # search_spotify(song, artist)
# len(result)

# adjust_parens(song)

05bfbizlM5AX6Mf1RRyMho


In [32]:
unique_billboard_tracks_df[unique_billboard_tracks_df['song']==song]

billboard_1960s

Unnamed: 0,song,artist,year,peak-rank,weeks-on-board,weeks-at-no1,track_id
10,(1-2-3-4-5-6-7) Count The Days,Inez & Charlie Foxx,1968,76,5,,
11,(A Ship Will Come) Ein Schiff Wird Kommen,Lale Anderson,1961,88,4,,
13,(All Of A Sudden) My Heart Sings,Mel Carter,1965,38,7,,
19,(Baby) Hully Gully,The Olympics,1960,72,7,,
29,(Come round Here) Im The One You Need,The Miracles,1966,17,9,,
...,...,...,...,...,...,...,...
36031,Zip Code,The Five Americans,1967,36,7,,
36032,Zip-A-Dee Doo-Dah,Bob B. Soxx And The Blue Jeans,1962,9,7,,
36033,Zip-A-Dee Doo-Dah,Bob B. Soxx And The Blue Jeans,1963,8,13,,
36039,Zorba The Greek,Herb Alpert & The Tijuana Brass,1965,82,1,,


In [33]:
billboard_1960s

Unnamed: 0,song,artist,year,peak-rank,weeks-on-board,weeks-at-no1,track_id
10,(1-2-3-4-5-6-7) Count The Days,Inez & Charlie Foxx,1968,76,5,,
11,(A Ship Will Come) Ein Schiff Wird Kommen,Lale Anderson,1961,88,4,,
13,(All Of A Sudden) My Heart Sings,Mel Carter,1965,38,7,,
19,(Baby) Hully Gully,The Olympics,1960,72,7,,
29,(Come round Here) Im The One You Need,The Miracles,1966,17,9,,
...,...,...,...,...,...,...,...
36031,Zip Code,The Five Americans,1967,36,7,,
36032,Zip-A-Dee Doo-Dah,Bob B. Soxx And The Blue Jeans,1962,9,7,,
36033,Zip-A-Dee Doo-Dah,Bob B. Soxx And The Blue Jeans,1963,8,13,,
36039,Zorba The Greek,Herb Alpert & The Tijuana Brass,1965,82,1,,


In [21]:

billboard_1960s['track_id'] = billboard_1960s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_1960s.to_csv(f'./Resources/billboard_1960s.csv', index=False)

Pandas Apply:   0%|          | 0/7792 [00:00<?, ?it/s]

Trying '(1-2-3-4-5-6-7) Count The Days' by Inez & Charlie Foxx...
(1-2-3-4-5-6-7) Count The Days by Inez & Charlie Foxx was successful!
4wXNtDvLZl55vlQashWQrI
Trying '(A Ship Will Come) Ein Schiff Wird Kommen' by Lale Anderson...
'(A Ship Will Come) Ein Schiff Wird Kommen' by Lale Anderson returned 0 results.
Trying '(A Ship Will Come) Eing  Schiff Wird Kommen' by Lale Anderson...
'(A Ship Will Come) Eing  Schiff Wird Kommen' by Lale Anderson returned 0 results.
Trying 'A Ship Will Come Eing Schiff Wird Kommen' by Lale Anderson...
'A Ship Will Come Eing Schiff Wird Kommen' by Lale Anderson returned 0 results.
Trying 'Eing  Schiff Wird Kommen' by Lale Anderson...
'Eing  Schiff Wird Kommen' by Lale Anderson returned 0 results.
Trying '(All Of A Sudden) My Heart Sings' by Mel Carter...
(All Of A Sudden) My Heart Sings by Mel Carter was successful!
6x5BiQwNlbtisITsEHa8Eu
Trying '(Baby) Hully Gully' by The Olympics...
(Baby) Hully Gully by The Olympics was successful!
2CkbNJooPxUsHG4gFXKhxa

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [35]:
billboard_1970s['track_id'] = billboard_1970s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_1970s.to_csv(f'./Resources/billboard_1970s.csv', index=False)

In [None]:
billboard_1980s['track_id'] = billboard_1980s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_1980s.to_csv(f'./Resources/billboard_1980s.csv', index=False)

In [46]:
billboard_1990s['track_id'] = billboard_1990s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_1990s.to_csv(f'./Resources/billboard_1990s.csv', index=False)

Unnamed: 0,year,peak-rank,weeks-on-board,weeks-at-no1,track_id
0,1978,82,5,,0MzHSJl9bHQiNPufYxJaab
1,1974,58,2,,4ZVWvCUwsOnIGmJMj71RkG
2,1975,9,12,,4ZVWvCUwsOnIGmJMj71RkG
3,1975,66,6,,
4,1978,79,4,,3e7bMUM2jFwEYBgDqWCBDs
...,...,...,...,...,...
6246,1978,73,6,,7jmU8QYtpfgDfi304r7klj
6247,1979,72,8,,7jmU8QYtpfgDfi304r7klj
6248,1972,64,11,,6QkdsyKK6Pq2wWuSgnpgQi
6249,1976,84,5,,0aJHZYjwbfTmeyUWF7zGxI


In [None]:
billboard_2000s['track_id'] = billboard_2000s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_2000s.to_csv(f'./Resources/billboard_2000s.csv', index=False)

In [36]:
billboard_2010s['track_id'] = billboard_2010s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_2010s.to_csv(f'./Resources/billboard_2010s.csv', index=False)

Pandas Apply:   0%|          | 0/5582 [00:00<?, ?it/s]

In [35]:
billboard_2020s['track_id'] = billboard_2020s[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
billboard_2020s.to_csv(f'./Resources/billboard_2020s.csv', index=False)

Pandas Apply:   0%|          | 0/1479 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [13]:
# df_1960s[df_1960s['track_id']]

df_2020s = pd.read_csv('./Resources/billboard_2020s.csv')
null_rows = df_2020s[df_2020s['track_id'].isnull()]

# null_rows[null_rows['song'].str.contains("\?")]
null_rows


# null_rows['track_id'] = null_rows[['song', 'artist']].swifter.apply(lambda row:get_track_id(row.song,row.artist),axis=1)
# null_rows.to_csv(f'./Resources/null1980s.csv', index=False)



Unnamed: 0,song,artist,year,peak-rank,weeks-on-board,weeks-at-no1,track_id
4,100.mil,J. Cole & Bas,2021,14,2,,
28,95.south,J. Cole,2021,8,3,,
33,AM,Nio Garcia X J Balvin X Bad Bunny,2021,41,10,5.0,
49,All Dat,Moneybagg Yo X Megan Thee Stallion,2020,70,4,,
57,All These N**gas,King Von Featuring Lil Durk,2020,77,1,,
...,...,...,...,...,...,...,...
1472,my.life,"J. Cole, 21 Savage & Morray",2021,2,14,2.0,
1475,pride.is.the.devil,J. Cole & Lil Baby,2021,7,6,,
1476,punchin.the.clock,J. Cole,2021,20,2,,
1477,the.climb.back,J. Cole,2021,25,4,,


In [28]:
missedvalues = pd.read_csv('./Resources/null1980s.csv')


missedvalues = missedvalues.drop(missedvalues.iloc[:,[0,1]], axis=1)

missedvalues.head()

Unnamed: 0,song,artist,year,peak-rank,weeks-on-board,weeks-at-no1,track_id
0,(A Ship Will Come) Ein Schiff Wird Kommen,Lale Anderson,1961,88,4,,
1,(Dance With The) Guitar Man,Duane Eddy and the Rebelettes,1962,12,13,,
2,(Dance With The) Guitar Man,Duane Eddy and the Rebelettes,1963,12,16,,
3,(Doin The) Lovers Leap,Webb Pierce,1960,93,2,,
4,(Hes) The Great Impostor,The Fleetwoods,1961,30,8,,5S96nzXyrDBAwDNfgUwvWW
