In [14]:
from bs4 import BeautifulSoup
import re
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from datetime import datetime
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Export the latest messages to html
Using a message exporter copied from here, https://github.com/cfinke/OSX-Messages-Exporter, we can export the transcript of the music chat with some filtering.

Exporter Arguments:
```
$ messages-exporter.php [-o|--output_directory output_directory]
                        [-f|--flush]
                        Optionally, flush the existing backup database, essentially starting over from scratch.
                        [-r|--rebuild]
                        Optionally, rebuild the HTML files from the existing database.
                        [-d|--database /path/to/chat/database]
                        Specify an alternate database file (i.e. another chat.db from a backup)
                        [--date-start YYYY-MM-DD]
                        Optionally, specify the first date that should be queried from the Messages database.
                        [--date-stop YYYY-MM-DD]
                        Optionally, specify the last date that should be queried from the Messages database.
                        [-t|--timezone "America/Los_Angeles"]
                        Optionally, supply a timezone to use for any dates and times that are displayed. Defaults to UTC.
                        [-p|--path-template "%Y-%m-%d - _CHAT_TITLE_"]
                        Optionally, supply a strftime-style format string to use for the exported chat files. **Use _CHAT_TITLE_ for the name of the chat.** For example, you can separate your chats into yearly files by using `--path-template "%Y - _CHAT_TITLE_"` or monthly files by using `--path-template "%Y-%m - _CHAT_TITLE_"`. You may also wish to use the date as a suffix so that chats from the same person are all organized together in Finder, in which case you might use `--path-template "_CHAT_TITLE_ - %Y-%m-%d"`
                        [--match "Conversation Title"]
                        Limit the output to conversations that include this argument somewhere in their title.
                        [--match_regex "/^Conversation Title$/"]
                        Limit the output to conversations whose titles match this regular expression.

```

### Required args

In [15]:
SCRIPT_PATH = "messages-exporter-copy.php"
# this is the name of the music chat in MY local chat database. Passing in the given chat name, "Music (A Little Spam)" will not work. Idk why some are contacts and some aren't
MUSIC_CHAT_NAME = "+15404495562, +15405531247, +15405778447, Ideen Ashraf, Jeffrey Smith, Josh Sternfeld, Marshall Hurst, Rustin Ahmadian, Wiatt Bingley"
output_dir = "music_chat_exports"
filter_start_date = "2024-06-20" # YYYY-MM-DD format
filter_stop_date = datetime.today().strftime('%Y-%m-%d') # YYYY-MM-DD format

### Run the messages exporter

In [16]:
# Define the arguments in a dictionary
args = {
    "--output_directory": output_dir,
    "--path-template": f"music_chat_backup_{filter_start_date}_{filter_stop_date if filter_stop_date else datetime.today().strftime('%Y-%m-%d')}",
    "--date-start": filter_start_date,
    "--date-stop": filter_stop_date,
    "--match": MUSIC_CHAT_NAME,
}

# Construct the command string
command = f"php {SCRIPT_PATH}"
for arg_name,value in args.items():
    if value is not None:
        command += f' {arg_name}="{value}"'
        
# call the command
!{command}

# double check the file was created
expected_output_path = os.path.join(args["--output_directory"], args["--path-template"] + ".html")
assert os.path.isfile(expected_output_path), FileNotFoundError(f"No file was exported to {expected_output_path}")
logging.info(f"Transcript successfully exported to {expected_output_path}")

INFO:root:Transcript successfully exported to music_chat_exports/music_chat_backup_2024-06-20_2024-07-01.html


# Update the playlist
Now we use the transcript we just exported to extract all Spotify track links sent within the filtered time frame. These tracks are then checked against the current list of tracks in the playlist, and any new ones are added to the playlist. Then we update the playlist description to reflect when it was last updated.

NOTE: We only add tracks that were sent in the chat to the playlist. Links to albums, artists, playlists, etc. are ignored.

### Load in the export

In [17]:
with open(expected_output_path, 'r') as file:
    html_str = file.read()
soup = BeautifulSoup(html_str, 'html.parser')
logging.info(f"Conversation loaded: {soup.title.text}")

INFO:root:Conversation loaded: Conversation: music_chat_backup_2024-06-20_2024-07-01


### Filter out all tracks

In [18]:
# Find all <p> tags with class 'm' that contain links starting with 'https://open.spotify/track'
html_links = soup.find_all('p', class_='m', string=re.compile(r'^https://open.spotify.com/track'))
spotify_track_urls = [link.text.strip() for link in html_links]
logging.info(f"{len(spotify_track_urls)} Spotify track urls found")

track_ids = [url.split('/')[-1].split('?')[0] for url in spotify_track_urls]


# Filter to only unique track ids
# We don't use a set here so that we can maintain the chronological order that the tracks were sent in
unique_ids = []
for track_id in track_ids:
    if track_id not in unique_ids:
        unique_ids.append(track_id)

logging.info(f"{len(unique_ids)} Unique IDs found")

INFO:root:51 Spotify track urls found
INFO:root:49 Unique IDs found


### Build the Spotify API connection

In [19]:
# how we use dotenv in a jupyter notebook
%load_ext dotenv
%dotenv
    
SPOTIFY_CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
SPOTIFY_CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')
SPOTIFY_REDIRECT_URI = os.getenv('SPOTIFY_REDIRECT_URI')

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [20]:
# Authentication flow with Spotify
sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        client_id=SPOTIFY_CLIENT_ID,
        client_secret=SPOTIFY_CLIENT_SECRET,
        redirect_uri=SPOTIFY_REDIRECT_URI, # This should match the redirect URI in your Spotify Developer Dashboard
        scope='playlist-modify-public'
    )
)

### Load the tracks currently in the playlist

In [21]:
playlist_id = "7hVMUyFFi6bNtjO4hubtJm"

In [None]:
existing_track_ids = []
limit = 100 # can't go higher
offset_mult = 0 # offset multiplier
while True:
    offset = offset_mult*100
    logging.info(f"loading tracks {offset} to {offset+limit} from playlist {playlist_id}")
    existing_tracks_chunk = sp.playlist_items(playlist_id, fields=["items"], limit=limit, offset=offset)["items"]
    logging.info(f"{len(existing_tracks_chunk)} tracks loaded")

    # if no tracks left, stop loading them
    if len(existing_tracks_chunk) == 0:
        break
    
    existing_ids = []
    for track in existing_tracks_chunk:
        try:
            id = track["track"]["id"]
            existing_track_ids.append(id)
        except Exception as e:
            logging.error(f"Failed to get track ID from: {track}")

    # if the there were less than 100 tracks loaded, don't bother trying again
    if len(existing_tracks_chunk) < limit:
        break

    offset_mult+=1

logging.info(f"{len(existing_track_ids)} tracks currently in playlist {playlist_id}")

INFO:root:loading tracks 0 to 100 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
INFO:root:loading tracks 100 to 200 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
INFO:root:loading tracks 200 to 300 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
INFO:root:loading tracks 300 to 400 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
INFO:root:loading tracks 400 to 500 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
ERROR:root:Failed to get track ID from: {'added_at': '2024-06-28T16:39:52Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/12161293301'}, 'href': 'https://api.spotify.com/v1/users/12161293301', 'id': '12161293301', 'type': 'user', 'uri': 'spotify:user:12161293301'}, 'is_local': False, 'primary_color': None, 'track': None, 'video_thumbnail': {'url': None}}
INFO:root:loading tracks 500 to 600 from playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:100 tracks loaded
INFO:root:l

### Get list of new tracks to add

In [32]:
track_ids_not_in_playlist = [track_id for track_id in track_ids if track_id not in existing_track_ids]
logging.info(f"{len(track_ids_not_in_playlist)} tracks found that are not in the playlist")

INFO:root:21 tracks found that are not in the playlist


### Add the new tracks to the playlist

In [33]:
# Create URIs because that is what the API expects
track_uris_to_add = [f'spotify:track:{track_id}' for track_id in track_ids_not_in_playlist]

In [34]:
def chunk(_list, size):
    for i in range(0, len(_list), size):  
        yield _list[i:i + size] 

# Need to chunk the uris because we can only add 100 at a time
chunked = list(chunk(track_uris_to_add, 100))
logging.info(f"{len(chunked)} chunks made")
# Add tracks to the playlist
for chunk in chunked:
    x = sp.playlist_add_items(playlist_id, chunk)
    logging.info(f"Tried to add {len(chunk)} tracks to playlist {playlist_id}")

logging.info(f'You can view your playlist here: https://open.spotify.com/playlist/{playlist_id}')

# update the playlist details
new_description = f'All songs sent in the "Music (A Little Spam)" group chat since I was added. Last updated on {filter_stop_date}.'
sp.playlist_change_details(playlist_id, description=new_description)

print(f'You can view your playlist here: https://open.spotify.com/playlist/{playlist_id}')

INFO:root:1 chunks made
INFO:root:Tried to add 21 tracks to playlist 7hVMUyFFi6bNtjO4hubtJm
INFO:root:You can view your playlist here: https://open.spotify.com/playlist/7hVMUyFFi6bNtjO4hubtJm


You can view your playlist here: https://open.spotify.com/playlist/7hVMUyFFi6bNtjO4hubtJm
