## HITS RADIO SCRAP

- In this project, I'll be scraping Top songs on Hits Radio website: https://onlineradiobox.com/ke/hitskenya/. 
- I'll be scraping the song name, artist name then searching the songs on spotify and generating a Playlist.
- I'll be using BeautifulSoup and requests libraries to scrape the website.

### Import libraries

In [1]:
from bs4 import BeautifulSoup
import requests as re
import csv
import os
from datetime import datetime

### Scrap website

In [2]:
# Hits Radio Playlist URL
url = 'https://onlineradiobox.com/ke/hitskenya/playlist/?cs=ke.xfmkenya'

# Requests to fetch tge url
html = re.get(url).text

# Parse the Url to BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

### Find songs and time

In [3]:
# Here I use the class name 'ajax' to find the songs
songs = soup.find_all('a', class_='ajax')

# Here I use the class name 'time--schedule' to find the time it was played
time = soup.find_all('span', class_='time--schedule')

### Get tracks and Get time functions 

In [5]:
# I then develop a function to get the tracks and strip the href
def get_tracks(soup):
    tracks = soup.find_all('a', class_='ajax', href=lambda x: x and x.startswith('/track'))
    return tracks
tracks = get_tracks(soup)

# Function to get time
def get_time(soup):
    time = soup.find_all('span', class_='time--schedule')
    return time
time = get_time(soup)

### Iterate though the get_tracks and get_time function to print corresponding Tracks, Time and Track ID 

In [6]:
# Create an empty list to store track details
track_list = []

# Loop through each track and its corresponding time
for track, t in zip(get_tracks(soup), get_time(soup)):  # Zip combines the track and t(time) elements pairwise
    track_title = track.text.strip()  # Extract track title
    track_href = track.get('href')  # Extract the href link
    track_id = track_href.split('/track/')[-1].strip('/')  # Extract track ID
    time = t.text.strip()  # Extract time

    # Store details in a dictionary and append to list
    track_list.append({
        "Time": time,
        "Track Title": track_title,
        "Track ID": track_id
    })

# Print the stored list (optional)
print(track_list)

[{'Time': 'Live', 'Track Title': 'Steel Banglez - Steel Banglez Fashion Week feat. AJ Tracey MoStack', 'Track ID': '1170572833495223199'}, {'Time': '14:17', 'Track Title': 'YFN Lucci - Both Of Us (feat. Rick Ross Layton Greene)', 'Track ID': '1891131398208258811'}, {'Time': '14:15', 'Track Title': 'KYLE - Optimistic (feat Dougie F)', 'Track ID': '1620510901518302452'}, {'Time': '14:11', 'Track Title': 'Dexta Daps - Wifi (TTRR )', 'Track ID': '432422557519354042'}, {'Time': '14:09', 'Track Title': 'Marshmello x Jonas Brothers - Leave Before You Love Me', 'Track ID': '1963171460897387007'}, {'Time': '14:05', 'Track Title': 'Falz Kamo Mphela Mpura - Squander (Remix) ft. Sayfar', 'Track ID': '1819056258839334610'}, {'Time': '14:02', 'Track Title': 'Mooski - Soul Bleed Clean', 'Track ID': '1242595616819564349'}, {'Time': '14:02', 'Track Title': 'Clean Bandit and Mabel - Tick Tock (feat. 24kGoldn)', 'Track ID': '1602900985190016573'}, {'Time': '12:58', 'Track Title': 'Justin Bieber - Justin 

In [7]:
# Loop through the list of track details and print each track
for track in track_list:
    print(track)

{'Time': 'Live', 'Track Title': 'Steel Banglez - Steel Banglez Fashion Week feat. AJ Tracey MoStack', 'Track ID': '1170572833495223199'}
{'Time': '14:17', 'Track Title': 'YFN Lucci - Both Of Us (feat. Rick Ross Layton Greene)', 'Track ID': '1891131398208258811'}
{'Time': '14:15', 'Track Title': 'KYLE - Optimistic (feat Dougie F)', 'Track ID': '1620510901518302452'}
{'Time': '14:11', 'Track Title': 'Dexta Daps - Wifi (TTRR )', 'Track ID': '432422557519354042'}
{'Time': '14:09', 'Track Title': 'Marshmello x Jonas Brothers - Leave Before You Love Me', 'Track ID': '1963171460897387007'}
{'Time': '14:05', 'Track Title': 'Falz Kamo Mphela Mpura - Squander (Remix) ft. Sayfar', 'Track ID': '1819056258839334610'}
{'Time': '14:02', 'Track Title': 'Mooski - Soul Bleed Clean', 'Track ID': '1242595616819564349'}
{'Time': '14:02', 'Track Title': 'Clean Bandit and Mabel - Tick Tock (feat. 24kGoldn)', 'Track ID': '1602900985190016573'}
{'Time': '12:58', 'Track Title': 'Justin Bieber - Justin Bieber - 

### Export Tracklist to csv

In [8]:
import pandas as pd
# Directory
directory = 'reports'

# Filename with timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
filename = f'Full_Tracklist_{timestamp}.csv'
file_path = os.path.join(directory, filename)

# Ensure that the directory exists
os.makedirs(directory, exist_ok=True)

# Converting to Pandas df and saving
df = pd.DataFrame(track_list)
df.to_csv(file_path, index=False)

print(f'CSV file saved to {file_path}')

CSV file saved to reports/Full_Tracklist_2025-01-27_14:28:21.csv


### Extract music from the last hour, two hours e.t.c

#### a) First of all we need to get the time range for the whole tracklist

In [9]:
from datetime import datetime

def get_time_range(track_list):
    """
    Get the earliest and latest times in the track list.
    
    :param track_list: List of dictionaries containing track details and times.
    :return: Tuple of (earliest_time, latest_time) in 'HH:MM' format.
    """
    earliest_time = None
    latest_time = None

    # Loop through the track list to check all times
    for track in track_list:
        track_time = track["Time"]

        # If track_time is 'Live', use the current time
        if track_time == 'Live':
            track_time = datetime.now().strftime("%H:%M")

        try:
            # Convert track time to a datetime object
            track_dt = datetime.strptime(track_time, "%H:%M")

            # Update the earliest and latest times
            if earliest_time is None or track_dt < earliest_time:
                earliest_time = track_dt
            if latest_time is None or track_dt > latest_time:
                latest_time = track_dt
        except ValueError:
            # Skip tracks with invalid time data
            print(f"Skipping track with invalid time data: {track['Track Title']} ({track_time})")

    # Return the times in 'HH:MM' format
    return earliest_time.strftime("%H:%M") if earliest_time else None, latest_time.strftime("%H:%M") if latest_time else None


- Print the time range from fetched data

In [10]:
print(f'The time range for this tracklist is: {get_time_range(track_list)}')

The time range for this tracklist is: ('02:07', '14:29')


### b) Filter tracks from the last x amount of hours

In [41]:
from datetime import datetime, timedelta

def get_recent_tracks(track_list, hours=1):
    """
    Extracts tracks from the last 'hours' from the given track list.
    
    :param track_list: List of dictionaries with keys: "Track Title", "Time", "Track ID".
    :param hours: Number of past hours to filter tracks (default: 1).
    :return: Filtered list of track dictionaries.
    """
    recent_tracks = []
    current_time = datetime.now()
    cutoff_time = current_time - timedelta(hours=hours)

    for track in track_list:
        track_title = track["Track Title"]
        track_time = track["Time"]

        # Handle "Live" as current time
        if track_time == "Live":
            track_time = current_time.strftime("%H:%M")

        try:
            # Parse time without assuming the date
            parsed_time = datetime.strptime(track_time, "%H:%M").time()
            
            # Create candidate datetime for today and yesterday
            today_date = current_time.date()
            candidate_dt_today = datetime.combine(today_date, parsed_time)
            candidate_dt_yesterday = candidate_dt_today - timedelta(days=1)

            # Check which candidate is within the cutoff window
            if candidate_dt_today <= current_time and candidate_dt_today >= cutoff_time:
                valid_dt = candidate_dt_today
            elif candidate_dt_yesterday >= cutoff_time:
                valid_dt = candidate_dt_yesterday
            else:
                continue  # Track is outside the window

            recent_tracks.append({
                "Time": track_time,
                "Track Title": track_title,
                "Track ID": track["Track ID"]
            })

        except ValueError:
            print(f"Skipping invalid time format: {track_title} ({track_time})")

    return recent_tracks, hours

- print tracks from the set period using the set time

In [35]:
last_x_hours = 3 
Recent_tracks = get_recent_tracks(track_list,hours = last_x_hours )

- Save the output of the most tracks from the recent hours into a csv file

In [44]:
# import pandas as pd
# Directory
directory = 'reports'

# Filename with timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
filename = f'Recent_{last_x_hours}H_tracks_{timestamp}.csv'
file_path = os.path.join(directory, filename)

# Ensure that the directory exists
os.makedirs(directory, exist_ok=True)

# Getting recent tracks (using only the list of tracks, not the tuple)
recent_tracks, _ = get_recent_tracks(track_list, hours=last_x_hours)

# Converting to Pandas df and saving
df = pd.DataFrame(recent_tracks)
df.to_csv(file_path, index=False)

print(f'CSV file saved to {file_path}')

CSV file saved to reports/Recent_3H_tracks_2025-01-26_16:41:52.csv


#### b) Filter tracks from a date range start_time to end_time 

In [11]:
from datetime import datetime

def get_tracks_in_range(track_list, start_time, end_time):
    """
    Extracts tracks within a specific time range from the track list.
    
    :param track_list: List of dictionaries containing track details and times.
    :param start_time: Start time in 'HH:MM' format.
    :param end_time: End time in 'HH:MM' format.
    :return: List of dictionaries with track details.
    """
    tracks_in_range = []

    # Convert start and end times to datetime objects
    start_dt = datetime.strptime(start_time, "%H:%M")
    end_dt = datetime.strptime(end_time, "%H:%M")

    for track in track_list:
        track_title = track["Track Title"]
        track_time = track["Time"]

        # Check if track_time is 'Live' (currently playing)
        if track_time == 'Live':
            track_time = datetime.now().strftime("%H:%M")  # Convert 'Live' to current time in HH:MM format

        try:
            # Convert track time to a datetime object
            track_dt = datetime.strptime(track_time, "%H:%M")

            # Check if the track falls within the given time range
            if start_dt <= track_dt <= end_dt:
                tracks_in_range.append({
                    "Time": track_time,
                    "Track Title": track_title,
                    "Track ID": track["Track ID"]
                })
        except ValueError:
            # Skip tracks with invalid time data
            print(f"Skipping track with invalid time data: {track_title} ({track_time})")

    return tracks_in_range

- Initialize start_date and end_date

In [12]:
start_time_x = '06:00'
end_time_y = '10:00'
get_tracks_in_range(track_list, start_time_x,end_time_y)

[{'Time': '09:58',
  'Track Title': 'Don t Clean - Bryson Tiller',
  'Track ID': '666134831798028050'},
 {'Time': '09:55',
  'Track Title': 'ZieZie - Blessed G46 GRIME',
  'Track ID': '738192331242851678'},
 {'Time': '09:52',
  'Track Title': 'Wes Nelson - Nice To Meet Ya (Lyrics) ft Yxng Bane',
  'Track ID': '449979559122487605'},
 {'Time': '09:26',
  'Track Title': 'Cheat Codes - Do It All Over (feat. Marc E. Bassy)',
  'Track ID': '1152998456193454283'},
 {'Time': '09:23',
  'Track Title': 'Diamond Platnumz ft Focalistic Costa Titch Pabi Cooper - Fresh (',
  'Track ID': '2269029313849538892'},
 {'Time': '09:20',
  'Track Title': 'One Acen - Monogram ft. Wauve',
  'Track ID': '1441228879897058741'},
 {'Time': '09:17',
  'Track Title': 'Adekunle Gold - Okay (Afro Pop Vol. 1)',
  'Track ID': '2161857512078794801'},
 {'Time': '09:14',
  'Track Title': 'Rema - Bounce',
  'Track ID': '1152998485684728645'},
 {'Time': '09:11',
  'Track Title': 'NSG Ft. LD - Roadblock',
  'Track ID': '35243

- Save the outputs of the time range into a csv file

In [13]:
# import pandas as pd
# Directory
directory = 'reports'

# Filename with timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
filename = f'Time_range_{start_time_x}-{end_time_y}H_tracks_{timestamp}.csv'
file_path = os.path.join(directory, filename)

# Ensure that the directory exists
os.makedirs(directory, exist_ok=True)

# Getting recent tracks (usin# import pandas as pd
# Directory
directory = 'reports'

# Filename with timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
filename = f'Time_range_{start_time_x}-{end_time_y}H_tracks_{timestamp}.csv'
file_path = os.path.join(directory, filename)

# Ensure that the directory exists
os.makedirs(directory, exist_ok=True)

# Getting recent tracks (using only the list of tracks, not the tuple)
time_range_tracks = get_tracks_in_range(track_list, start_time_x,end_time_y)

# Converting to Pandas df and saving
df = pd.DataFrame(time_range_tracks)
df.to_csv(file_path, index=False)

# print(f'CSV file saved to {file_path}') only the list of tracks, not the tuple)
time_range_tracks = get_tracks_in_range(track_list, start_time_x,end_time_y)

# Converting to Pandas df and saving
df = pd.DataFrame(time_range_tracks)
df.to_csv(file_path, index=False)

print(f'CSV file saved to {file_path}')

CSV file saved to reports/Time_range_06:00-10:00H_tracks_2025-01-27_14:31:20.csv


### Spotify’s search API expects a query in this format: 'https://api.spotify.com/v1/search?q=<query>&type=track'


In [14]:
import re
# clean the title
def clean_title(title):
    """Remove noise like (Lyrics), codes, etc."""
    title = re.sub(r'\([^)]*\)|\b\d+.*$', '', title)  # Remove () content and codes
    title = re.sub(r'\s+', ' ', title).strip()
    return title

# Parsing the track_list to a spotify query
def parse_tracklist_to_spotify_query(track_list):
    spotify_queries = []
    for track in track_list:
        track_title = track.get("Track Title", "Unknown Title")
        artist = track.get("Artist", "").strip()

        # Clean the title first
        track_title = clean_title(track_title)

        # Extract artist from title if missing
        if not artist:
            # Split by common delimiters (e.g., " - ", "feat", "ft", "&")
            parts = re.split(r'\s+[-–—]+\s+| feat\b| ft\b| & |/', track_title, flags=re.IGNORECASE)
            possible_artist = parts[0].strip() if parts else "Unknown Artist"
            # Allow apostrophes, hyphens, and & in artist names
            possible_artist = re.sub(r'[^a-zA-Z\s\'\-&]', '', possible_artist)
            artist = possible_artist if possible_artist else "Unknown Artist"

        # Extract featured artists from title (e.g., "Song (feat. Artist)")
        featured = re.findall(r'\(feat[.\s]*([^)]+)\)', track_title, re.IGNORECASE)
        if featured:
            featured_artist = re.sub(r'[^a-zA-Z\s\'\-&]', '', featured[0].strip())
            artist += f", {featured_artist}"

        # Final cleanup
        artist = re.sub(r'\s+', ' ', artist).strip()
        query = f"track:{track_title} artist:{artist}"
        spotify_queries.append(query)

    return spotify_queries

In [15]:
raw_data = parse_tracklist_to_spotify_query(time_range_tracks)
print(raw_data)

['track:Don t Clean - Bryson Tiller artist:Don t Clean', 'track:ZieZie - Blessed G46 GRIME artist:ZieZie', 'track:Wes Nelson - Nice To Meet Ya ft Yxng Bane artist:Wes Nelson', 'track:Cheat Codes - Do It All Over artist:Cheat Codes', 'track:Diamond Platnumz ft Focalistic Costa Titch Pabi Cooper - Fresh ( artist:Diamond Platnumz', 'track:One Acen - Monogram ft. Wauve artist:One Acen', 'track:Adekunle Gold - Okay artist:Adekunle Gold', 'track:Rema - Bounce artist:Rema', 'track:NSG Ft. LD - Roadblock artist:NSG', 'track:Fireboy DML Ed Sheeran - Peru artist:Fireboy DML Ed Sheeran', 'track:D-Block Europe - Ferrari Horses ft. Raye artist:D-Block Europe', 'track:Ariana Grande - One last time By La Mayor artist:Ariana Grande', 'track:Stefflon Don - Can t Let You Go artist:Stefflon Don', 'track:Yxng Bane ft. WSTRN - Fine Wine GRM Daily artist:Yxng Bane', 'track:Headie One - Siberia ft. Burna Boy artist:Headie One', 'track:Wauve - Monzo ft. One Acen artist:Wauve', 'track:Bayanni - Ta Ta Ta artist

In [16]:
for x in raw_data:
    print(x)

track:Don t Clean - Bryson Tiller artist:Don t Clean
track:ZieZie - Blessed G46 GRIME artist:ZieZie
track:Wes Nelson - Nice To Meet Ya ft Yxng Bane artist:Wes Nelson
track:Cheat Codes - Do It All Over artist:Cheat Codes
track:Diamond Platnumz ft Focalistic Costa Titch Pabi Cooper - Fresh ( artist:Diamond Platnumz
track:One Acen - Monogram ft. Wauve artist:One Acen
track:Adekunle Gold - Okay artist:Adekunle Gold
track:Rema - Bounce artist:Rema
track:NSG Ft. LD - Roadblock artist:NSG
track:Fireboy DML Ed Sheeran - Peru artist:Fireboy DML Ed Sheeran
track:D-Block Europe - Ferrari Horses ft. Raye artist:D-Block Europe
track:Ariana Grande - One last time By La Mayor artist:Ariana Grande
track:Stefflon Don - Can t Let You Go artist:Stefflon Don
track:Yxng Bane ft. WSTRN - Fine Wine GRM Daily artist:Yxng Bane
track:Headie One - Siberia ft. Burna Boy artist:Headie One
track:Wauve - Monzo ft. One Acen artist:Wauve
track:Bayanni - Ta Ta Ta artist:Bayanni
track:Maroon artist:Maroon
track:DJ Snake

In [17]:
import re
from thefuzz import fuzz, process
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import unicodedata
import logging


import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set up Spotify authentication
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=os.getenv("SPOTIFY_CLIENT_ID"),
    client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"),
    redirect_uri="http://localhost:8080/callback",
    scope="playlist-modify-public"
))

class SpotifyTrackMatcher:
    def __init__(self, sp_client):
        """
        Initialize the Spotify Track Matcher with enhanced search capabilities.
        
        :param sp_client: Authenticated Spotify client
        """
        self.sp = sp_client
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

    def normalize_text(self, text):
        """
        Comprehensive text normalization:
        1. Convert to lowercase
        2. Remove diacritical marks
        3. Remove extra whitespaces
        4. Handle special characters
        
        :param text: Input text to normalize
        :return: Normalized text
        """
        # Remove diacritical marks
        text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
        
        # Lowercase and remove extra spaces
        return re.sub(r'\s+', ' ', text.lower()).strip()

    def extract_canonical_features(self, title):
        """
        Extract canonical features from track title:
        1. Remove parenthetical info 
        2. Remove common words
        3. Extract core title elements
        
        :param title: Original track title
        :return: Canonical title features
        """
        # Remove parenthetical content and metadata
        title = re.sub(r'\(.*?\)|\[.*?\]', '', title)
        
        # Remove common words and prefixes
        title = re.sub(r'\b(the|a|an|remix|remaster|live)\b', '', title, flags=re.IGNORECASE)
        
        return self.normalize_text(title)

    def advanced_artist_matching(self, track_candidates, search_artist):
        """
        Advanced artist matching with multiple strategies:
        1. Full artist name match
        2. Partial artist name match
        3. Multiple artist combinations
        
        :param track_candidates: List of track candidates
        :param search_artist: Artist search string
        :return: Best matching track
        """
        normalized_search_artist = self.normalize_text(search_artist)
        
        # Split potential multiple artists
        potential_artists = re.split(r'[&,x]|\bft\.|\bfeat\.', search_artist, flags=re.IGNORECASE)
        potential_artists = [self.normalize_text(artist.strip()) for artist in potential_artists]
        
        best_match = None
        best_score = 0
        
        for track in track_candidates:
            # Extract track artists
            track_artists = [self.normalize_text(artist['name']) for artist in track['artists']]
            
            # Score calculation with multiple strategies
            artist_match_score = max([
                max([fuzz.ratio(pa, ta) for ta in track_artists]) 
                for pa in potential_artists
            ])
            
            # Weighted scoring
            track_score = (
                0.6 * artist_match_score + 
                0.4 * fuzz.ratio(
                    self.extract_canonical_features(track['name']),
                    self.extract_canonical_features(search_artist)
                )
            )
            
            if track_score > best_score:
                best_score = track_score
                best_match = track
        
        # Confidence threshold
        return best_match if best_score > 70 else None

    def search_track(self, title, artist, max_results=20):
        """
        Comprehensive track search with multiple fallback strategies
        
        :param title: Track title
        :param artist: Artist name
        :param max_results: Maximum search results to consider
        :return: Best matching track URI or None
        """
        # Normalize inputs
        normalized_title = self.normalize_text(title)
        normalized_artist = self.normalize_text(artist)
        
        # Search strategies in order of specificity
        search_strategies = [
            f"track:{title} artist:{artist}",  # Most specific
            f"track:{normalized_title}",       # Title-only fallback
            f"artist:{normalized_artist}"      # Artist-only fallback
        ]
        
        for strategy in search_strategies:
            try:
                results = self.sp.search(q=strategy, type='track', limit=max_results)
                track_candidates = results['tracks']['items']
                
                if track_candidates:
                    # Advanced matching
                    best_match = self.advanced_artist_matching(track_candidates, artist)
                    
                    if best_match:
                        self.logger.info(f"Matched: {best_match['name']} - {best_match['artists'][0]['name']}")
                        return best_match['uri']
            
            except Exception as e:
                self.logger.error(f"Search error with strategy {strategy}: {e}")
        
        self.logger.warning(f"No match found for {title} by {artist}")
        return None

def parse_and_match_spotify(sp_client, raw_data):
    """
    Process multiple tracks with enhanced matching
    
    :param sp_client: Spotify client
    :param raw_data: Raw track data
    :return: List of matched track URIs
    """
    matcher = SpotifyTrackMatcher(sp_client)
    uris = []
    
    for entry in raw_data:
        try:
            # Split and clean entry similar to original implementation
            title, artist = parse_entry(entry)
            
            uri = matcher.search_track(title, artist)
            if uri:
                uris.append(uri)
        
        except Exception as e:
            matcher.logger.error(f"Error processing entry {entry}: {e}")
    
    return uris

def parse_entry(entry):
    """
    Parse raw entry into title and artist
    
    :param entry: Raw track entry
    :return: Tuple of (title, artist)
    """
    # Implement similar parsing logic to original code
    # This is a placeholder and should match your specific input format
    parts = entry.split("artist:")
    title = parts[0].replace("track:", "").strip()
    artist = parts[1].strip() if len(parts) > 1 else "Unknown"
    
    return title, artist


In [18]:
track_uris = parse_and_match_spotify(sp, raw_data)
print(f"\nA total of {
    len(track_uris)} out of the {len(raw_data)} played on Radio were matched on Spotify. \nThat's {
        len(track_uris)/len(raw_data)*100:.2F}% of the songs.\n")
# print("Matched URIs:", track_uris)

INFO:__main__:Matched: Fine Girl - ZieZie
INFO:__main__:Matched: Say Nothing - Wes Nelson
INFO:__main__:Matched: Yo Bunny - Cheat Codes Remix - prodbycpkshawn
INFO:__main__:Matched: African Beauty (feat. Omarion) - Diamond Platnumz
INFO:__main__:Matched: Hate on Me (feat. One Acen) - Tion Wayne
INFO:__main__:Matched: Done For Me (feat. Adekunle Gold) - Fridayy
INFO:__main__:Matched: OZEBA - Rema
INFO:__main__:Matched: Nyash (Current & Savings) - NSG
INFO:__main__:Matched: Rocket Science - Clavish
INFO:__main__:Matched: Bang Bang - Jessie J
INFO:__main__:Matched: The Reason Why - JP Cooper
INFO:__main__:Matched: Shape of You - Yxng Bane Remix - Ed Sheeran
INFO:__main__:Matched: The One - Headie One
INFO:__main__:Matched: Pavements - Wauve
INFO:__main__:Matched: Bukhaar - Aroob Khan
INFO:__main__:Matched: Marooned - Maroon
INFO:__main__:Matched: OZEBA - Rema
INFO:__main__:Matched: Radio Silence - Ryan Riback Remix - R3HAB
INFO:__main__:Matched: C'est Cuit - Major Lazer VIP Remix - Major 


A total of 36 out of the 51 played on Radio were matched on Spotify. 
That's 70.59% of the songs.



- Save matched tracks to csv file

In [93]:
# import os
# import pandas as pd
# from datetime import datetime

# Directory
directory = 'reports'

# Filename with timestamp
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
filename = f'Matched_Spotify_tracks_{timestamp}.csv'
file_path = os.path.join(directory, filename)

# Ensure that the directory exists
os.makedirs(directory, exist_ok=True)

# Convert the list of queries into a DataFrame
df = pd.DataFrame(track_uris, columns=["Spotify Query"])

# Save the DataFrame to a CSV file
df.to_csv(file_path, index=False)

print(f'CSV file saved to {file_path}')

CSV file saved to reports/Matched_Spotify_tracks_2025-01-26_19:18:33.csv


In [19]:
# Create a new playlist
playlist_name = f"Hits Radio {timestamp}"
user_id = sp.me()['id']
playlist = sp.user_playlist_create(user_id, playlist_name, public=True)

# Add tracks to the playlist
sp.playlist_add_items(playlist['uri'], track_uris)

print(f"Playlist {playlist_name} created with {len(track_uris)} tracks!")

Playlist Hits Radio 2025-01-27_14:31:20 created with 36 tracks!


In [94]:
# Create a new playlist with a description
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
playlist_name = f"Hits Radio - Morning {timestamp}"
playlist_description = "A curated list of the latest hits on played on Hits Radio from 6-10am everyday, by David Kibet."

# Get the user ID
user_id = sp.me()['id']

# Create the playlist with a description
playlist = sp.user_playlist_create(user_id, playlist_name, public=True, description=playlist_description)

# Add tracks to the playlist
sp.playlist_add_items(playlist['uri'], track_uris)

print(f"Playlist '{playlist_name}' created with {len(track_uris)} tracks!")


Playlist 'Hits Radio - Morning 2025-01-26_19:18:56' created with 37 tracks!
