In [1]:
import os
import pandas as pd
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from sqlalchemy import create_engine, MetaData, Table, select
import string

# Collect Track Release Dates

From a previous project, connect to database and create dataframe of track data.

In [2]:
engine_name = 'postgresql://' + str(os.environ['zU']) + ':' + str(os.environ['zP']) + str(os.environ['AWS_PROJECT_KOJAK_EC2'])
engine = create_engine(engine_name)

In [3]:
df = pd.read_sql_query('''SELECT 
                          tracks.album_name,
                          tracks.album_id,
                          tracks.duration_ms,
                          tracks.track_id,
                          tracks.track_name,
                          artists.artist_name,
                          artists.artist_id,
                          lyrics.lyrics
                          FROM tracks
                          JOIN artists ON tracks.artist_id = artists.artist_id
                          JOIN lyrics ON tracks.track_id = lyrics.track_id
                          WHERE tracks.energy IS NOT NULL
                          AND lyrics.lyrics IS NOT NULL
                          AND artists.artist_name != 'Kid Rock'
                          ORDER BY tracks.track_name;''', engine)

Clean up dataframe by removing duplicates, correcting capitalization, and remove tracks that don't have lyrics.

In [4]:
df['track_name'] = df['track_name'].apply(lambda i: string.capwords(i))
df.drop_duplicates(subset=['track_name', 'artist_name'], inplace=True)
df.reset_index(drop=True, inplace=True)

In [5]:
df.drop(df[df['lyrics'].str.contains('<span')].index, inplace=True)
df.reset_index(drop=True, inplace=True)

Connect to Spotify API and get release dates of tracks by their album release date.

In [6]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.environ['SPOTIFY_CLIENT_ID'],
                                                     client_secret=os.environ['SPOTIFY_CLIENT_SECRET'])

spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
release_dates = []

for index, row in df.iterrows():
    album_id = row['album_id']
    release_date = spotify.album(album_id)['release_date']
    release_dates.append(release_date)