In [1]:
# Import necessary libraries
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from api_credentials import client_id, client_secret
import pandas as pd
from functions import get_playlist_tracks_and_artists, find_tracks_positions_in_playlists
import json
import re
import matplotlib.pyplot as plt
from datetime import date, datetime
import logging
from time import sleep
import os

logging.basicConfig(level=logging.INFO)

# Load list of playlists from JSON file
with open('playlists.json', 'r') as file:
    playlists_dict = json.load(file)

# Initialise the Spotify client with client credentials for public data access
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

import logging
# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

### Fetch New Music Friday AU & NZ tracks, and then find which addtional playlists they are in (and their positions):

In [29]:
 # New Music Friday AU & NZ playlist 
playlist_id = '37i9dQZF1DWT2SPAYawYcO'

# Fetches track names and artist names from New Music Friday AU & NZ
# Returns a list of tuples, each containing a track name and concatenated artist names.
# Example, [('Foam', 'Royel Otis'),('One More Night', 'KUČKA, Flume')]
track_details = get_playlist_tracks_and_artists(sp, playlist_id)

# Uses`track_details` from above and `playlists_dict' (loaded JSON file)
# Finds the positions of each track in multiple playlists

track_positions = find_tracks_positions_in_playlists(sp, track_details, playlists_dict)

`track_positions` example:

```json
{
  "TEXAS HOLD 'EM - Beyoncé": {
    "track_name": "TEXAS HOLD 'EM",
    "artist_name": "Beyoncé",
    "playlists": [
      {"playlist": "Top 50 Australia", "position": 5},
      {"playlist": "New Music Friday AU & NZ", "position": 1},
      {"playlist": "Hot Hits Australia", "position": 10}
    ]
  }
}


### Fetch Follower Count For Each Playlist:

In [30]:
# Fetching playlist follower counts
# Dictionary to store follower counts
playlist_followers = {}

# Fetching follower counts
for playlist_name, playlist_id in playlists_dict.items():
    playlist = sp.playlist(playlist_id)
    follower_count = playlist['followers']['total']
    playlist_followers[playlist_name] = follower_count
    if 'followers' not in playlist:
        logging.warning(f"Followers data missing in the playlist data for {playlist_id}")
print("Playlist followers data have been fetched")
# # Printing the result

# for playlist_name, followers in playlist_followers.items():
#     print(f"{playlist_name}: {followers:,} followers")

Playlist followers data have been fetched


### Create and save Dataframe with Spotify data:

In [32]:

rows = []

for track_id, track_info in track_positions.items():
    artist_name = track_info['artist_name']
    track_name = track_info['track_name']
    for playlist_info in track_info['playlists']:
        playlist_name = playlist_info['playlist']
        position = playlist_info['position']
        # Fetch the actual follower count using the playlist name
        followers = playlist_followers.get(playlist_name, 0)  # Default to 0 if playlist not found
        rows.append({
            'Artist': artist_name,
            'Title': track_name,
            'Playlist': playlist_name,
            'Position': position,
            'Followers': followers
        })

# Convert the list of rows into a DataFrame
df = pd.DataFrame(rows)

df.to_csv('streamlit.csv', index=False)

### Capture `popularity` score day after NMF releases (Saturday)
#### Tally up Sat - Fri popularity scores for all songs. 

In [82]:
df = pd.read_csv('streamlit.csv')

Unnamed: 0,Artist,Title,Playlist,Position,Followers
0,SZA,Saturn,Top 50 Australia,13,815731
1,SZA,Saturn,New Music Friday AU & NZ,1,370716
2,SZA,Saturn,Hot Hits Australia,8,1401693
3,SZA,Saturn,Breaking Hits,3,73033
4,Selena Gomez,Love On,New Music Friday AU & NZ,2,370716


In [50]:
def is_correct_track(track, artist, title):
    return track['artists'][0]['name'].lower() == artist.lower() and track['name'].lower() == title.lower()

def update_popularity(artist_title):
    artist, title = artist_title.split(' - ', 1)
    try:
        results = sp.search(q='artist:' + artist + ' track:' + title, type='track', limit=1)
        if results['tracks']['items']:
            track = results['tracks']['items'][0]
            if is_correct_track(track, artist, title):
                return track['popularity']
            else:
                logging.warning(f"No accurate match found for {artist_title}")
        else:
            logging.info(f"No results for {artist_title}")
    except Exception as e:
        logging.error(f"Error fetching data for {artist_title}: {e}")
        sleep(1)  # Simple backoff strategy
    return None  # Return None or a default value for missing/incorrect data

def apply_update_popularity(row):
    # Extract 'Artist_Title' from the row and call update_popularity
    return update_popularity(row['Artist_Title'])

In [72]:
# Ensure the 'Artist_Title' column is created by concatenating 'Artist' and 'Title'
df['Artist_Title'] = df['Artist'] + ' - ' + df['Title']

# Drop duplicates based on 'Artist_Title' immediately to avoid unnecessary iterations
df = df.drop_duplicates(subset='Artist_Title', keep='first').reset_index(drop=True)

# Now, apply the function to each row and update the 'Popularity' column
# Assuming apply_update_popularity is your function to fetch or calculate popularity
df['Popularity'] = df.apply(lambda row: apply_update_popularity(row), axis=1)

# Sort by 'Popularity' in descending order and drop rows with NaN in 'Popularity'
df_sorted_cleaned = df.sort_values(by="Popularity", ascending=False).dropna(subset=['Popularity'])

# Select only the 'Artist_Title' and 'Popularity' columns
final_df = df_sorted_cleaned[['Artist_Title', 'Popularity']]

# Add a 'Date' column with today's date formatted as 'YYYY-MM-DD'
today_date = date.today().strftime('%Y-%m-%d')
final_df.insert(0, 'Date', today_date)

final_df = final_df.reset_index(drop=True)

# Display the top rows of the final DataFrame
final_df.head(10)


2024-02-25 00:52:58,321 - INFO - No results for Missy Higgins - You Should Run
2024-02-25 00:53:05,048 - INFO - No results for Curtis Cole, Mkada - I'm So Lit


Unnamed: 0,Date,Artist_Title,Popularity
0,2024-02-25,LE SSERAFIM - EASY,78.0
1,2024-02-25,IU - Shopper,59.0
2,2024-02-25,Central Cee - I Will,54.0
3,2024-02-25,Kings of Leon - Mustang,53.0
4,2024-02-25,Khruangbin - May Ninth,53.0
5,2024-02-25,Nemzzz - PTSD,48.0
6,2024-02-25,Adrianne Lenker - Fool,48.0
7,2024-02-25,Glass Beams - Mahal - Edit,43.0
8,2024-02-25,Xavier Rudd - World Order - Part 1,39.0
9,2024-02-25,Real Estate - Flowers,34.0


### Create initial `popularity_data` csv file

In [75]:
# # Define the CSV file name
# csv_file_path = 'popularity_data/popularity_data.csv'  # Generic name for ongoing use

# # Initially, save the DataFrame with the header (this step is for the initial setup or if you're starting a new cumulative file)
# final_df.to_csv(csv_file_path, index=False)


In [None]:
# Define the file path
csv_file_path = 'popularity_data/popularity_data.csv'  # The same generic file name for appending

# Check if the file exists to determine if the header should be written
file_exists = os.path.isfile(csv_file_path)

# Append the DataFrame to the CSV file, without the header if the file already exists
final_df.to_csv(csv_file_path, mode='a', index=False, header=not file_exists)



### Fetch Playlist Images:

In [33]:
cover_art_dict = {}

for playlist_name, playlist_id in playlists_dict.items():

    playlist_data = sp.playlist(playlist_id)

    # Fetching playlist cover image URL
    cover_image_url = playlist_data['images'][0]['url'] if playlist_data['images'] else 'No image available'

    # append to dictionary 
    cover_art_dict[playlist_name] = cover_image_url


### Fetch Cover Artist Details:

In [34]:
# Initialize the dictionary outside of the loop
cover_artist_dict = {}

for playlist_name, playlist_id in playlists_dict.items():
    # Fetch playlist data from Spotify
    playlist = sp.playlist(playlist_id)

    # Extract the required information
    playlist_description = playlist.get('description', 'No description available')

    # Use regex for case-insensitive search for 'Cover: ' and extract the cover artist name
    match = re.search(r'cover:\s*(.*?)$', playlist_description, re.IGNORECASE)
    if match:
        cover_artist = match.group(1)  # Extract the matched artist name

        # Add to the dictionary only if the cover artist is meaningful (not 'No cover artist found')
        if cover_artist.strip().lower() != "no cover artist found":
            cover_artist_dict[playlist_name] = cover_artist

### Remove Image URLs from `cover_art_dict` that don't have a Cover Artist. As they will not be needed. Only Cover Art featuring an artist is useful. 

In [36]:
# Create a new dictionary that will only include matching keys
filtered_cover_art_dict = {}

# Loop through the cover_art_dict
for playlist_name in cover_art_dict:
    # Check if the current key also exists in cover_artist_dict
    if playlist_name in cover_artist_dict:
        # Add it to the new dictionary
        filtered_cover_art_dict[playlist_name] = cover_art_dict[playlist_name]

### Saving dictionary's to JSON files for later import use into Streamlit.

In [37]:
data = {
    'filtered_cover_art_dict': filtered_cover_art_dict,
    'cover_artist_dict': cover_artist_dict
}

# Write the combined dictionary to a file
with open('cover_art_data.json', 'w') as f:
    json.dump(data, f, indent=4)

### Add Date to CSV file before storing in SQLite 

In [19]:
date_to_add = datetime.strptime('2024-02-23', '%Y-%m-%d')
df.insert(0, 'Date', date_to_add)
df.head()

Unnamed: 0,Date,Artist,Title,Playlist,Position,Followers
0,2024-02-23,SZA,Saturn,Top 50 Australia,19,815745
1,2024-02-23,SZA,Saturn,New Music Friday AU & NZ,1,370765
2,2024-02-23,SZA,Saturn,Hot Hits Australia,8,1401895
3,2024-02-23,SZA,Saturn,Breaking Hits,3,73070
4,2024-02-23,Selena Gomez,Love On,New Music Friday AU & NZ,2,370765


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       193 non-null    datetime64[ns]
 1   Artist     193 non-null    object        
 2   Title      193 non-null    object        
 3   Playlist   193 non-null    object        
 4   Position   193 non-null    int64         
 5   Followers  193 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(3)
memory usage: 9.2+ KB


In [20]:
df.to_csv('archived_nmf_data/2024-02-23.csv', index=False)

In [42]:
# The path to the JSON file
json_file_path = 'cover_art_data.json'

# Function to load the JSON file into a DataFrame
def load_json_to_dataframe(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Transform the nested dictionaries into a pandas DataFrame
    dataframe = pd.DataFrame({
        'Playlist': list(data['filtered_cover_art_dict'].keys()),
        'Cover Art URL': list(data['filtered_cover_art_dict'].values()),
        'Featured Artist': list(data['cover_artist_dict'].values())
    })
    
    return dataframe

# Call the function with the JSON file path when you want to create the DataFrame
cover_info_df = load_json_to_dataframe(json_file_path)


In [43]:
cover_info_df.head()

Unnamed: 0,Playlist,Cover Art URL,Featured Artist
0,New Music Friday AU & NZ,https://i.scdn.co/image/ab67706f000000031ca893...,SZA
1,Hot Hits Australia,https://i.scdn.co/image/ab67706f00000003ea3f74...,Selena Gomez
2,Front Left,https://i.scdn.co/image/ab67706f00000003649831...,Royel Otis
3,A1,https://i.scdn.co/image/ab67706f00000003993725...,Central Cee
4,Dance Generation,https://i.scdn.co/image/ab67706f00000003924d4d...,BENNETT


In [44]:
date_to_add = datetime.strptime('2024-02-23', '%Y-%m-%d')
cover_info_df.insert(0, 'Date', date_to_add)

In [51]:
cover_info_df.head()

Unnamed: 0,Date,Playlist,Image_URL,Cover_Artist
0,2024-02-23,New Music Friday AU & NZ,https://i.scdn.co/image/ab67706f000000031ca893...,SZA
1,2024-02-23,Hot Hits Australia,https://i.scdn.co/image/ab67706f00000003ea3f74...,Selena Gomez
2,2024-02-23,Front Left,https://i.scdn.co/image/ab67706f00000003649831...,Royel Otis
3,2024-02-23,A1,https://i.scdn.co/image/ab67706f00000003993725...,Central Cee
4,2024-02-23,Dance Generation,https://i.scdn.co/image/ab67706f00000003924d4d...,BENNETT


In [52]:
df.head()

Unnamed: 0,Artist,Title,Playlist,Position,Followers
0,SZA,Saturn,Top 50 Australia,19,815744
1,SZA,Saturn,New Music Friday AU & NZ,1,370765
2,SZA,Saturn,Hot Hits Australia,8,1401902
3,SZA,Saturn,Breaking Hits,3,73071
4,Selena Gomez,Love On,New Music Friday AU & NZ,2,370765


In [54]:
date_to_add = datetime.strptime('2024-02-23', '%Y-%m-%d')
df.insert(0, 'Date', date_to_add)

In [55]:
df.head()

Unnamed: 0,Date,Artist,Title,Playlist,Position,Followers
0,2024-02-23,SZA,Saturn,Top 50 Australia,19,815744
1,2024-02-23,SZA,Saturn,New Music Friday AU & NZ,1,370765
2,2024-02-23,SZA,Saturn,Hot Hits Australia,8,1401902
3,2024-02-23,SZA,Saturn,Breaking Hits,3,73071
4,2024-02-23,Selena Gomez,Love On,New Music Friday AU & NZ,2,370765


In [60]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       193 non-null    datetime64[ns]
 1   Artist     193 non-null    object        
 2   Title      193 non-null    object        
 3   Playlist   193 non-null    object        
 4   Position   193 non-null    int64         
 5   Followers  193 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(3)
memory usage: 9.2+ KB


In [61]:
# Performing an inner join on 'Date' and 'Playlist'
merged_df = pd.merge(df, cover_info_df, on=['Date', 'Playlist'], how='left')

In [62]:
merged_df.head()

Unnamed: 0,Date,Artist,Title,Playlist,Position,Followers,Image_URL,Cover_Artist
0,2024-02-23,SZA,Saturn,Top 50 Australia,19,815744,,
1,2024-02-23,SZA,Saturn,New Music Friday AU & NZ,1,370765,https://i.scdn.co/image/ab67706f000000031ca893...,SZA
2,2024-02-23,SZA,Saturn,Hot Hits Australia,8,1401902,https://i.scdn.co/image/ab67706f00000003ea3f74...,Selena Gomez
3,2024-02-23,SZA,Saturn,Breaking Hits,3,73071,https://i.scdn.co/image/ab67706f00000003684c52...,KAROL G & Tiesto
4,2024-02-23,Selena Gomez,Love On,New Music Friday AU & NZ,2,370765,https://i.scdn.co/image/ab67706f000000031ca893...,SZA


In [59]:
merged_df.to_csv('archived_nmf_data/2024-02-23.csv', index=False)