In [60]:
import re

import pandas as pd
from googleapiclient.discovery import build

import requests
from bs4 import BeautifulSoup

with open('../secrets.txt', 'r') as file:
    api_key = file.read()

In [15]:
full_csv = pd.read_csv('../data/full.csv')
to_keep = ["track_name", "peak_pos", "album_name", "key", "tempo"]
songs = full_csv[to_keep]
songs.head()

Unnamed: 0,track_name,peak_pos,album_name,key,tempo
0,...Ready For It?,4.0,reputation,2.0,160.015
1,'tis the damn season,39.0,evermore,5.0,145.916
2,"""Slut!"" (Taylor's Version) [From The Vault]",3.0,1989 (Taylor's Version),2.0,155.875
3,22,20.0,Red,7.0,104.007
4,22 (Taylor's Version),52.0,Red (Taylor's Version),7.0,103.984


In [16]:
# Placeholder for your album names extracted from 'songs' DataFrame
album_names = songs['album_name'].unique()
album_playlist_mapping = {album_name: "INSERT" for album_name in album_names}

# Manually inserting these...
album_playlist_mapping["Midnights"] = "PLxA687tYuMWgXjGLvPvOWqXWgHEIA2JW6"
album_playlist_mapping["evermore"] = "PLmU8B4gZ41ifO00RpWcvv0vx_UEAyfx8U"
album_playlist_mapping["folklore"] = "PLmU8B4gZ41icKdheg4d2KZBgDR1wSWfbH"
album_playlist_mapping["Lover"] = "PLkqz3S84Tw-RdM_IQwtQrYJpkOmJH2Ngw"
album_playlist_mapping["reputation"] = "PLbf3ayzuvZVU9NvZVGv4EE_YLlli0IEpt"
album_playlist_mapping["1989 (Taylor\'s Version)"] = "PLxA687tYuMWiHaLuwtNz6edZMBBgrXTL_"
album_playlist_mapping["1989"] = "PLxA687tYuMWiHaLuwtNz6edZMBBgrXTL_"
album_playlist_mapping["Red (Taylor\'s Version)"] = "PLxA687tYuMWitUXCzppRcz3Tvnc763PPB"
album_playlist_mapping["Red"] = "PLxA687tYuMWitUXCzppRcz3Tvnc763PPB"
album_playlist_mapping["Speak Now (Taylor\'s Version)"] = "PLxA687tYuMWiQ8--6osItwaFfYm2Pj3ci"
album_playlist_mapping["Speak Now"] = "PLxA687tYuMWiQ8--6osItwaFfYm2Pj3ci"
album_playlist_mapping["Fearless (Taylor\'s Version)"] = "PLINj2JJM1jxNeeZ9lih8SNd_NJEkA22u0"
album_playlist_mapping["Fearless"] = "PLINj2JJM1jxNeeZ9lih8SNd_NJEkA22u0"
album_playlist_mapping["The Taylor Swift Holiday Collection"] = "PL9K04N6k-RchxPTSQ8_9J3kTiINsC3TpW"
album_playlist_mapping["Taylor Swift"] = "PL2mL2i76wIyZ0XVYrc6ZK76iE-lgNL9jm"


print(album_playlist_mapping)

{'reputation': 'PLbf3ayzuvZVU9NvZVGv4EE_YLlli0IEpt', 'evermore': 'PLmU8B4gZ41ifO00RpWcvv0vx_UEAyfx8U', "1989 (Taylor's Version)": 'PLxA687tYuMWiHaLuwtNz6edZMBBgrXTL_', 'Red': 'PLxA687tYuMWitUXCzppRcz3Tvnc763PPB', "Red (Taylor's Version)": 'PLxA687tYuMWitUXCzppRcz3Tvnc763PPB', 'Taylor Swift': 'PL2mL2i76wIyZ0XVYrc6ZK76iE-lgNL9jm', 'Lover': 'PLkqz3S84Tw-RdM_IQwtQrYJpkOmJH2Ngw', nan: 'INSERT', '1989': 'PLxA687tYuMWiHaLuwtNz6edZMBBgrXTL_', 'Midnights': 'PLxA687tYuMWgXjGLvPvOWqXWgHEIA2JW6', 'folklore': 'PLmU8B4gZ41icKdheg4d2KZBgDR1wSWfbH', 'Speak Now': 'PLxA687tYuMWiQ8--6osItwaFfYm2Pj3ci', "Speak Now (Taylor's Version)": 'PLxA687tYuMWiQ8--6osItwaFfYm2Pj3ci', 'Beautiful Eyes': 'INSERT', 'Fearless': 'PLINj2JJM1jxNeeZ9lih8SNd_NJEkA22u0', "Fearless (Taylor's Version)": 'PLINj2JJM1jxNeeZ9lih8SNd_NJEkA22u0', 'The Taylor Swift Holiday Collection': 'PL9K04N6k-RchxPTSQ8_9J3kTiINsC3TpW'}


In [78]:
youtube = build('youtube', 'v3', developerKey=api_key)

def get_videos_from_playlist_helper(playlist_id):
    if playlist_id == "INSERT":
        return []
    
    videos = []
    next_page_token = None

    while True:
        pl_request = youtube.playlistItems().list(
            part='id,snippet',
            playlistId=playlist_id,
            maxResults=50,  # Adjust based on your needs
            pageToken=next_page_token
        )

        pl_response = pl_request.execute()

        # Extract video URLs from the playlist items
        for item in pl_response['items']:
            video_id = item['snippet']['resourceId']['videoId']
            videos.append(f'https://www.youtube.com/watch?v={video_id}')

        next_page_token = pl_response.get('nextPageToken')

        if not next_page_token:
            break

    return videos


def get_videos_from_playlists(album_playlist_mapping):
    video_urls = {}

    for album_name, playlist_id in album_playlist_mapping.items():
        video_urls[album_name] = get_videos_from_playlist_helper(playlist_id)

    return video_urls


def get_video_title(url):
    try:
        response = requests.get(url)
        response.raise_for_status() 

        soup = BeautifulSoup(response.text, 'html.parser')

        title_tag = soup.find('title')
        video_title = title_tag.text.replace(' - YouTube', '') if title_tag else 'Title not found'
        return video_title
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return 'Title retrieval failed'


def clean(text):
    pattern = r'[^\w\d]|\s'
    s = re.sub(pattern, '', text)
    return s.lower()


def map_videos_to_songs(songs_df, video_urls):
    updated_songs_df = songs_df.copy()
    updated_songs_df['video_url'] = ''
    total = 0
    matched = 0

    for album_name, videos in video_urls.items():
        for video_url in videos:
            match = False
            video_title = get_video_title(video_url)
            video_title = clean(video_title)
            total += 1

            for index, row in updated_songs_df[updated_songs_df['album_name'] == album_name].iterrows():
                if row["video_url"] != "":
                    continue
                track = clean(row["track_name"])
                if track in video_title:
                    updated_songs_df.loc[index, 'video_url'] = video_url
                    matched += 1
                    match = True
                    break
            if not match:
                print(f"{video_title} not matched.")
                
    print(f"Prop Matched: {matched/total}")
    return updated_songs_df

In [36]:
video_urls = get_videos_from_playlists(album_playlist_mapping)

In [79]:
mapped_songs = map_videos_to_songs(songs, video_urls)

1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
ronantaylorsversion not matched.
1hourcleanpopsongsplaylistcleanpopplaylist2024cleanpopmusicmixcleanpopmix not matched.
bettermantaylorsversionfromthevault not matched.
nothingnewtaylorsversionfromthevault not matched.
babetaylorsversionfromthevault not matched.
messageinabottletaylorsversionfromthevault not matched.
ibetyouthinkaboutmetaylorsversionfromthevault not matched.
foreverwintertaylorsversionfromthevault not matched.
runtaylorsversionfromthevault not matched.
theveryfirstnighttaylorsversionfromthevault not matched.
alltoowell10minuteversiontaylorsve

In [163]:
s = "Two Is Better Than One"
index_to_update = mapped_songs[mapped_songs['track_name'] == s].index[0]
mapped_songs.at[index_to_update, 'video_url'] = "https://www.youtube.com/watch?v=vSursMeCec4&pp=ygUdVHdvIGlzIGJldHRlciB0aGFuIG9uZSB0YXlsb3I%3D"


In [164]:
mapped_songs[mapped_songs["video_url"] == ""]

Unnamed: 0,track_name,peak_pos,album_name,key,tempo,video_url
135,I'm Only Me When I'm With You,,Taylor Swift,8.0,143.964,
136,I'm Only Me When I'm With You,,Beautiful Eyes,,,
137,I'm Only Me When I'm With You,,Beautiful Eyes,,,
167,Lover (Remix),,,7.0,205.272,
205,Red (Original Demo Recording),,Red,1.0,124.998,
215,Safe & Sound (Taylor's Version),,,7.0,144.641,
223,Should've Said No (Alternate Version),,Beautiful Eyes,,,
252,Sweeter Than Fiction (Taylor's Version),,1989 (Taylor's Version),,,
260,The Alcott,,,0.0,156.008,
295,Treacherous (Original Demo Recording),,Red,2.0,109.993,


In [None]:
mapped_songs.to_csv('audio.csv', index=False)  # Save the links just in case