From 685aa056b4ad1925e5a0d0e0259a313b034366dd Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:25:58 +0330 Subject: [PATCH 01/29] add readme.md for apple-music-scraper --- Apple-Music-Scraper/README.md | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Apple-Music-Scraper/README.md diff --git a/Apple-Music-Scraper/README.md b/Apple-Music-Scraper/README.md new file mode 100644 index 0000000000..a1019ec8ff --- /dev/null +++ b/Apple-Music-Scraper/README.md @@ -0,0 +1,48 @@ +# Apple Music Scraper + +**Apple Music Scraper** is a powerful tool for extracting rich data from Apple Music web pages using web scraping and reverse-engineered structure of `serialized-server-data`. + +This project is unofficial and purely for educational purposes. No official Apple API used. + + +
+ + +## 🔧 Functions + +| Function | Parameters | Description | +|------------------------|------------------------------|------------------------------------------------------| +| `search(keyword)` | `keyword: str` | Search Apple Music for artists, songs, albums, playlists, and videos | +| `song_scrape(url)` | `url: str` | Get metadata and preview URL of a song | +| `album_scrape(url)` | `url: str` | Get album details including songs, artist, similar albums, and videos | +| `playlist_scrape(url)` | `url: str` | Extract song URLs from a playlist | +| `room_scrape(url)` | `url: str` | Extract song URLs from a shared room | +| `video_scrape(url)` | `url: str` | Get video metadata including direct video URL and related content | +| `artist_scrape(url)` | `url: str` | Get artist metadata including top songs, albums, videos, bio, and more | + +Each function returns structured JSON containing metadata, URLs, images, previews, and related data. + + +
+ +## 🧠 Usage + +Just import the scraper and call your function: + +```python + +result = search('night tapes') +artists = result['artists'] + +artist_url = artists[0]['url'] +artist = artist_scrape(artist_url) + +latest_night_tapes_song_url = artist['latest'] + +song = album_scrape(latest_night_tapes_song_url) +song_name = song['title'] +song_cover = song['image'] + +print(f"\nLatest Night Tapes Song: {song_name}\nCover Art: {song_cover}\n") + +``` From fbb0d9388bb7d4dba1874db3d7893529c864861f Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:26:30 +0330 Subject: [PATCH 02/29] add apple-music-scraper files --- Apple-Music-Scraper/main.py | 557 +++++++++++++++++++++++++++++++++++ Apple-Music-Scraper/utils.py | 44 +++ 2 files changed, 601 insertions(+) create mode 100644 Apple-Music-Scraper/main.py create mode 100644 Apple-Music-Scraper/utils.py diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py new file mode 100644 index 0000000000..e2e8ba69a2 --- /dev/null +++ b/Apple-Music-Scraper/main.py @@ -0,0 +1,557 @@ +from bs4 import BeautifulSoup +import requests, json +from utils import * + + +def room_scrape(link="https://music.apple.com/us/room/6748797380"): + result = [] + headers = { + "User-Agent": "Mozilla/5.0" + } + + rspn = requests.get(link, headers=headers) + sup = BeautifulSoup(rspn.text, "html.parser") + items = sup.find('script',{"id":"serialized-server-data"}) + our_json = json.loads(items.text) + sections = our_json[0]['data']['sections'] + + for i in sections: + if "copper-track-swoosh" in i['id']: + items = i['items'] + break + else: + items = [] + + for i in items: + song_url = i['playAction']['actionMetrics']['data'][0]['fields']['actionUrl'] + result.append(convert_album_to_song_url(song_url)) + + return result + +def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): + result = [] + headers = { + "User-Agent": "Mozilla/5.0" + } + + rspn = requests.get(link, headers=headers) + sup = BeautifulSoup(rspn.text, "html.parser") + items = sup.find('script',{"id":"serialized-server-data"}) + our_json = json.loads(items.text) + sections = our_json[0]['data']['sections'] + + for i in sections: + if "track-list" in i['id']: + items = i['items'] + break + else: + items = [] + + for i in items: + song_url = i['playAction']['actionMetrics']['data'][0]['fields']['actionUrl'] + result.append(convert_album_to_song_url(song_url)) + + return result + +def search(keyword="sasha sloan"): + result = { + 'artists':[], + 'albums':[], + 'songs':[], + 'playlists':[], + 'videos':[] + } + link = "https://music.apple.com/us/search?term="+keyword + + headers = { + "User-Agent": "Mozilla/5.0" + } + + rspn = requests.get(link, headers=headers) + soup = BeautifulSoup(rspn.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + sections = our_json[0]['data']['sections'] + + for i in sections: + if "artist" in i['id']: + artists = i + elif "album" in i['id']: + albums = i + elif "song" in i['id']: + songs = i + elif "playlist" in i['id']: + playlists = i + elif "music_video" in i['id']: + videos = i + + try: + artists_result = [] + + for i in artists['items']: + artist = i['title'] + try: + image_url = i['artwork']['dictionary']['url'] + image_width = i['artwork']['dictionary']['width'] + image_height = i[0]['artwork']['dictionary']['height'] + artwork = get_cover(image_url, image_width, image_height) + except: + artwork = "" + + url = i['contentDescriptor']['url'] + artists_result.append({'title':artist, 'url':url, 'image':artwork}) + result['artists'] = artists_result + + except: + pass + + + try: + albums_result = [] + + for i in albums['items']: + song = i['titleLinks'][0]['title'] + artist = i['subtitleLinks'][0]['title'] + try: + image_url = i['artwork']['dictionary']['url'] + image_width = i['artwork']['dictionary']['width'] + image_height = i[0]['artwork']['dictionary']['height'] + artwork = get_cover(image_url, image_width, image_height) + except: + artwork = "" + + url = i['contentDescriptor']['url'] + albums_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) + result['albums'] = albums_result + + except: + pass + + + try: + songs_result = [] + + for i in songs['items']: + song = i['title'] + artist = i['subtitleLinks'][0]['title'] + try: + image_url = i['artwork']['dictionary']['url'] + image_width = i['artwork']['dictionary']['width'] + image_height = i[0]['artwork']['dictionary']['height'] + artwork = get_cover(image_url, image_width, image_height) + except: + artwork = "" + + url = i['contentDescriptor']['url'] + songs_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) + result['songs'] = songs_result + except: + pass + + + + try: + playlists_result = [] + + for i in playlists['items']: + song = i['titleLinks'][0]['title'] + artist = i['subtitleLinks'][0]['title'] + try: + image_url = i['artwork']['dictionary']['url'] + image_width = i['artwork']['dictionary']['width'] + image_height = i[0]['artwork']['dictionary']['height'] + artwork = get_cover(image_url, image_width, image_height) + except: + artwork = "" + + url = i['contentDescriptor']['url'] + playlists_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) + result['playlists'] = playlists_result + except: + pass + + + try: + videos_results = [] + + for i in videos['items']: + song = i['titleLinks'][0]['title'] + artist = i['subtitleLinks'][0]['title'] + try: + image_url = i['artwork']['dictionary']['url'] + image_width = i['artwork']['dictionary']['width'] + image_height = i[0]['artwork']['dictionary']['height'] + artwork = get_cover(image_url, image_width, image_height) + except: + artwork = "" + + url = i['contentDescriptor']['url'] + videos_results.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) + result['videos'] = videos_results + except: + pass + + return result + +def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): + result = { + 'title':'', + 'image':'', + 'kind':'', + 'album': { + 'title':'', + 'url':'' + }, + 'artist': { + 'title':'', + 'url':'' + }, + 'more':[], + 'preview-url':'' + } + + rspn = requests.get(url) + soup = BeautifulSoup(rspn.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + + song_details = our_json[0]['data']['sections'][0] + + result['title'] = song_details['items'][0]['title'] + + image_url = song_details['items'][0]['artwork']['dictionary']['url'] + image_width = song_details['items'][0]['artwork']['dictionary']['width'] + image_height = song_details['items'][0]['artwork']['dictionary']['height'] + + result['image'] = get_cover(image_url, image_width, image_height) + + result['kind'] = song_details['presentation']['kind'] + result['album']['title'] = song_details['items'][0]['album'] + result['album']['url'] = song_details['items'][0]['albumLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] + result['artist']['title'] = song_details['items'][0]['artists'] + result['artist']['url'] = song_details['items'][0]['artistLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] + + json_tag = soup.find("script", {"id": "schema:song", "type": "application/ld+json"}) + data = json.loads(json_tag.string) + + preview_url = data['audio']['audio']['contentUrl'] + result['preview-url'] = preview_url + + more_songs = our_json[0]['data']['sections'][-1]['items'] + + more_songs_list = [] + + for i in more_songs: + more_songs_list.append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + + result['more'] = more_songs_list + + return result + +def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): + result = { + 'title':'', + 'image':'', + 'caption':'', + 'artist': { + 'title':'', + 'url':'' + }, + 'songs':[], + 'info':'', + 'more':[], + 'similar':[], + 'videos':[] + } + + headers = { + "User-Agent": "Mozilla/5.0" + } + + rspn = requests.get(url, headers=headers) + soup = BeautifulSoup(rspn.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + sections = our_json[0]['data']['sections'] + + index=0 + for i in sections: + if "album-detail" in i['id']: + album_detail_index = index + elif "track-list " in i['id']: + track_list_index = index + elif "video" in i['id']: + video_index = index + elif "more" in i['id']: + more_index = index + elif "you-might-also-like" in i['id']: + similar_index = index + elif "track-list-section" in i['id']: + track_list_section_index = index + index+=1 + + try: + result['title'] = sections[album_detail_index]['items'][0]['title'] + except: + pass + + try: + image_url = sections[album_detail_index]['items'][0]['artwork']['dictionary']['url'] + image_width = sections[album_detail_index]['items'][0]['artwork']['dictionary']['width'] + image_height = sections[album_detail_index]['items'][0]['artwork']['dictionary']['height'] + result['image'] = get_cover(image_url, image_width, image_height) + except: + pass + + try: + result['caption'] = sections[album_detail_index]['items'][0]['modalPresentationDescriptor']['paragraphText'] + except: + pass + + try: + result['artist']['title'] = sections[album_detail_index]['items'][0]['subtitleLinks'][0]['title'] + result['artist']['url'] = sections[album_detail_index]['items'][0]['subtitleLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] + except: + pass + + try: + album_songs = sections[track_list_index]['items'] + for i in album_songs: + result['songs'].append(convert_album_to_song_url(i['contentDescriptor']['url'])) + except: + pass + + try: + result['info'] = sections[track_list_section_index]['items'][0]['description'] + more_songs = sections[more_index]['items'] + for i in more_songs: + result['more'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + similar_songs = sections[similar_index]['items'] + for i in similar_songs: + result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + videos = sections[video_index]['items'] + for i in videos: + result['videos'].append(i['contentDescriptor']['url']) + except: + pass + + return result + +def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): + result = { + 'title': '', + 'image': '', + 'artist': { + 'title': '', + 'url': '' + }, + 'video-url': '', + 'more': [], + 'similar':[] + } + + headers = { + "User-Agent": "Mozilla/5.0" + } + + res = requests.get(url, headers=headers) + soup = BeautifulSoup(res.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + + sections = our_json[0]['data']['sections'] + + for i in sections: + if "music-video-header" in i['id']: + music_video_header = i + elif "more-by-artist" in i['id']: + more = i + elif "more-in-genre" in i['id']: + similar = i + + try: + result['title'] = music_video_header['items'][0]['title'] + except: + pass + + try: + image_url = music_video_header['items'][0]['artwork']['dictionary']['url'] + image_width = music_video_header['items'][0]['artwork']['dictionary']['width'] + image_height = music_video_header['items'][0]['artwork']['dictionary']['height'] + result['image'] = get_cover(image_url, image_width, image_height) + except: + pass + + try: + result['artist']['title'] = music_video_header['items'][0]['subtitleLinks'][0]['title'] + result['artist']['url'] = music_video_header['items'][0]['subtitleLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] + except: + pass + + try: + json_tag = soup.find("script", {"id": "schema:music-video", "type": "application/ld+json"}) + data = json.loads(json_tag.string) + result['video-url'] = data['video']['contentUrl'] + except: + pass + + try: + for i in more['items']: + result['more'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in similar['items']: + result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + return result + +def artist_scrape(url="https://music.apple.com/us/artist/king-princess/1349968534"): + result = { + 'title':'', + 'image':'', + 'latest':'', + 'top':[], + 'albums':[], + 'singles_and_EP':[], + 'playlists':[], + 'videos':[], + 'similar':[], + 'appears_on':[], + 'more_to_see':[], + 'more_to_hear':[], + 'about':'', + 'info':'', + } + + headers = { + "User-Agent": "Mozilla/5.0" + } + + rspn = requests.get(url, headers=headers) + soup = BeautifulSoup(rspn.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + + sections = our_json[0]['data']['sections'] + + for i in sections: + if "artist-detail-header-section" in i['id']: + artist_detail = i + elif "latest-release-and-top-songs" in i['id']: + latest_and_top = i + elif "full-albums" in i['id']: + albums = i + elif "playlists" in i['id']: + playlists = i + elif "music-videos" in i['id']: + videos = i + elif "singles" in i['id']: + singles = i + elif "appears-on" in i['id']: + appears_on = i + elif "more-to-see" in i['id']: + more_to_see = i + elif "more-to-hear" in i['id']: + more_to_hear = i + elif "artist-bio" in i['id']: + bio = i + elif "similar-artists" in i['id']: + similar = i + + try: + result['title'] = artist_detail['items'][0]['title'] + except: + pass + + try: + image_url = artist_detail['items'][0]['artwork']['dictionary']['url'] + image_width = artist_detail['items'][0]['artwork']['dictionary']['width'] + image_height = artist_detail['items'][0]['artwork']['dictionary']['height'] + result['image'] = get_cover(image_url, image_width, image_height) + except: + pass + + try: + result['latest'] = latest_and_top['pinnedLeadingItem']['item']['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] + except: + pass + + try: + for i in latest_and_top['items']: + result['top'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in albums['items']: + result['albums'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + result['singles_and_EP'] = get_all_singles(url) + except: + pass + + try: + for i in playlists['items']: + result['playlists'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in videos['items']: + result['videos'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in similar['items']: + result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in appears_on['items']: + result['appears_on'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in more_to_see['items']: + result['more_to_see'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + for i in more_to_hear['items']: + result['more_to_hear'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) + except: + pass + + try: + result['about'] = bio['items'][0]['modalPresentationDescriptor']['paragraphText'] + except: + pass + + try: + result['info'] = bio['items'][0]['modalPresentationDescriptor']['headerSubtitle'] + except: + pass + + return result + + +print(search()) \ No newline at end of file diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py new file mode 100644 index 0000000000..2264ba767e --- /dev/null +++ b/Apple-Music-Scraper/utils.py @@ -0,0 +1,44 @@ +import urllib.parse +import requests, json +from bs4 import BeautifulSoup + +def get_cover(url, width, height, format="jpg", crop_option=""): + new_url = url.replace("{w}", str(width)) + new_url = new_url.replace("{h}", str(height)) + new_url = new_url.replace("{c}", crop_option) + new_url = new_url.replace("{f}", format) + return new_url + +def convert_album_to_song_url(album_url): + parsed = urllib.parse.urlparse(album_url) + query_params = urllib.parse.parse_qs(parsed.query) + song_id = query_params.get('i', [None])[0] + + if not song_id: + return None + + parts = parsed.path.split('/') + country = parts[1] + title = parts[3] + + return f"https://music.apple.com/{country}/song/{title}/{song_id}" + +def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968534"): + result = [] + url = url+"/see-all?section=singles" + + headers = { + "User-Agent": "Mozilla/5.0" + } + + res = requests.get(url, headers=headers) + soup = BeautifulSoup(res.text, "html.parser") + items = soup.find('script', {'id': 'serialized-server-data'}) + our_json = json.loads(items.text) + + sections = our_json[0]['data']['sections'][0]['items'] + + for i in sections: + result.append((i['segue']['actionMetrics']['data'][0]['fields']['actionUrl'])) + + return result \ No newline at end of file From 9cfabde8f66ca8efe5004eb6c12bb8b61db21cc0 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:33:09 +0330 Subject: [PATCH 03/29] add requirements.txt file for apple-music-scraper --- Apple-Music-Scraper/README.md | 7 ++++++- Apple-Music-Scraper/requirements.txt | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Apple-Music-Scraper/requirements.txt diff --git a/Apple-Music-Scraper/README.md b/Apple-Music-Scraper/README.md index a1019ec8ff..1042769aab 100644 --- a/Apple-Music-Scraper/README.md +++ b/Apple-Music-Scraper/README.md @@ -27,8 +27,13 @@ Each function returns structured JSON containing metadata, URLs, images, preview ## 🧠 Usage -Just import the scraper and call your function: +install requirements +```bash +pip install -r requirements.txt +``` + +use scraper! ```python result = search('night tapes') diff --git a/Apple-Music-Scraper/requirements.txt b/Apple-Music-Scraper/requirements.txt new file mode 100644 index 0000000000..d779dbe0dd --- /dev/null +++ b/Apple-Music-Scraper/requirements.txt @@ -0,0 +1,2 @@ +bs4 +requests \ No newline at end of file From d88ae3c3095a8e07375a16f868212ec80885b090 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:41:56 +0330 Subject: [PATCH 04/29] update apple-music-scraper readme for follow guidline --- Apple-Music-Scraper/README.md | 117 +++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 43 deletions(-) diff --git a/Apple-Music-Scraper/README.md b/Apple-Music-Scraper/README.md index 1042769aab..cd997d9cd3 100644 --- a/Apple-Music-Scraper/README.md +++ b/Apple-Music-Scraper/README.md @@ -1,53 +1,84 @@ # Apple Music Scraper -**Apple Music Scraper** is a powerful tool for extracting rich data from Apple Music web pages using web scraping and reverse-engineered structure of `serialized-server-data`. - -This project is unofficial and purely for educational purposes. No official Apple API used. - - -
- - -## 🔧 Functions - -| Function | Parameters | Description | -|------------------------|------------------------------|------------------------------------------------------| -| `search(keyword)` | `keyword: str` | Search Apple Music for artists, songs, albums, playlists, and videos | -| `song_scrape(url)` | `url: str` | Get metadata and preview URL of a song | -| `album_scrape(url)` | `url: str` | Get album details including songs, artist, similar albums, and videos | -| `playlist_scrape(url)` | `url: str` | Extract song URLs from a playlist | -| `room_scrape(url)` | `url: str` | Extract song URLs from a shared room | -| `video_scrape(url)` | `url: str` | Get video metadata including direct video URL and related content | -| `artist_scrape(url)` | `url: str` | Get artist metadata including top songs, albums, videos, bio, and more | - -Each function returns structured JSON containing metadata, URLs, images, previews, and related data. - - -
- -## 🧠 Usage - -install requirements - -```bash -pip install -r requirements.txt +A powerful unofficial scraper for extracting structured metadata from Apple Music web pages using web-scraping techniques and reverse-engineered `serialized-server-data`. +This tool is for **educational purposes only** and does **not** use any official Apple API. + +- **Functionalities** + - Search for artists, songs, albums, playlists, and videos + - Extract song metadata and preview URLs + - Fetch album details including tracks, artist info, similar albums, and videos + - Scrape playlist and shared room song URLs + - Retrieve video metadata and direct video links + - Fetch full artist information including top songs, albums, biography, and more + +--- + +## Setup Instructions + +1. Clone or download the project +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` +3. Import and use the scraper in your Python script: + ```python + result = search('night tapes') + artists = result['artists'] + + artist_url = artists[0]['url'] + artist = artist_scrape(artist_url) + + latest_night_tapes_song_url = artist['latest'] + + song = album_scrape(latest_night_tapes_song_url) + song_name = song['title'] + song_cover = song['image'] + + print(f"\nLatest Night Tapes Song: {song_name}\nCover Art: {song_cover}\n") + ``` + +--- + +## Detailed Explanation + +Each scraping function processes the `serialized-server-data` embedded in Apple Music’s webpage structure. +The scraper extracts metadata such as: +- Titles, URLs, artwork +- Track lists +- Preview links +- Album/artist relationships +- Related videos or albums +All results are returned as **structured JSON objects** for easy access in your applications. + +--- + +## Output + +The scraper returns JSON structures like: + +```json +{ + "title": "Example Song", + "artist": "Example Artist", + "image": "https://example-image.jpg", + "preview": "https://example-preview.m4a", + "related": [...], + "songs": [...] +} ``` -use scraper! -```python +You can log these results, display them in an interface, or process them however you like. -result = search('night tapes') -artists = result['artists'] +--- -artist_url = artists[0]['url'] -artist = artist_scrape(artist_url) +## Author -latest_night_tapes_song_url = artist['latest'] +- [**Abssdghi**](https://github.com/Abssdghi) -song = album_scrape(latest_night_tapes_song_url) -song_name = song['title'] -song_cover = song['image'] +--- -print(f"\nLatest Night Tapes Song: {song_name}\nCover Art: {song_cover}\n") +## Disclaimers -``` +- This project is **not affiliated with Apple Inc.** +- It uses **web scraping** and may break if Apple changes its internal web structure. +- For **educational and personal use only**. Redistribution of scraped content may violate Apple Music’s Terms of Service. From 3e15ab17fdad388c7a5c34fff81aa9e8952c91ae Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:45:01 +0330 Subject: [PATCH 05/29] add docstring for main.py in apple-music-scraper --- Apple-Music-Scraper/main.py | 173 +++++++++++++++++++++++++++++++++++- 1 file changed, 170 insertions(+), 3 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index e2e8ba69a2..cda74e1173 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -4,6 +4,25 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): + """ + Scrape a shared Apple Music room and extract song URLs. + + Parameters + ---------- + link : str, optional + URL of the Apple Music room page. Defaults to an example room link. + + Returns + ------- + list[str] + List of converted song URLs extracted from the room. + + Notes + ----- + This function parses the `serialized-server-data` script tag within + the Apple Music room HTML, locates the 'copper-track-swoosh' section, + and extracts track URLs. + """ result = [] headers = { "User-Agent": "Mozilla/5.0" @@ -29,6 +48,24 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): return result def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): + """ + Scrape an Apple Music playlist and extract all track URLs. + + Parameters + ---------- + link : str, optional + URL of the Apple Music playlist. Defaults to New Music Daily. + + Returns + ------- + list[str] + List of converted song URLs from the playlist. + + Notes + ----- + Uses the 'track-list' section from Apple Music's internal serialized + server data to extract song action URLs. + """ result = [] headers = { "User-Agent": "Mozilla/5.0" @@ -54,6 +91,28 @@ def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl return result def search(keyword="sasha sloan"): + """ + Search Apple Music for artists, songs, albums, playlists and videos. + + Parameters + ---------- + keyword : str, optional + Search query to send to Apple Music. Defaults to "sasha sloan". + + Returns + ------- + dict + Structured JSON-like dictionary containing search results: + - artists + - albums + - songs + - playlists + - videos + + Notes + ----- + Scrapes `serialized-server-data` to access Apple Music's internal search structure. + """ result = { 'artists':[], 'albums':[], @@ -194,6 +253,30 @@ def search(keyword="sasha sloan"): return result def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): + """ + Scrape a single Apple Music song page and extract metadata. + + Parameters + ---------- + url : str, optional + URL of the Apple Music song. Defaults to sample link. + + Returns + ------- + dict + Dictionary containing: + - title + - image (full resolution) + - kind (song type) + - album info (title + URL) + - artist info (title + URL) + - preview-url + - list of more songs + + Notes + ----- + Uses the `schema:song` JSON-LD tag to extract preview URL. + """ result = { 'title':'', 'image':'', @@ -249,6 +332,37 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): return result def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): + """ + Scrape an Apple Music album page and extract metadata, songs, related albums, videos, etc. + + Parameters + ---------- + url : str, optional + URL of the Apple Music album. Defaults to example album. + + Returns + ------- + dict + Dictionary containing: + - title + - image + - caption/description + - artist info + - song URLs + - album info text + - more songs (same artist) + - similar (recommended) albums + - videos related to the album + + Notes + ----- + Extracts multiple sections such as: + - album-detail + - track-list + - similar albums + - more by artist + - album videos + """ result = { 'title':'', 'image':'', @@ -346,6 +460,30 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 return result def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): + """ + Scrape Apple Music music-video page and extract metadata + video file URL. + + Parameters + ---------- + url : str, optional + URL of the Apple Music music-video. Defaults to example. + + Returns + ------- + dict + { + title, + image, + artist: {title, url}, + video-url, + more (same artist), + similar (same genre) + } + + Notes + ----- + Uses JSON-LD block `schema:music-video` to extract the direct video content URL. + """ result = { 'title': '', 'image': '', @@ -418,6 +556,38 @@ def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualiz return result def artist_scrape(url="https://music.apple.com/us/artist/king-princess/1349968534"): + """ + Scrape an Apple Music artist page and extract all available metadata. + + Parameters + ---------- + url : str, optional + Apple Music artist page URL. Defaults to King Princess sample link. + + Returns + ------- + dict + Dictionary containing: + - title + - image + - latest release URL + - list of top songs + - all albums + - singles & EPs + - playlists + - videos + - similar artists + - appears on + - more-to-see (videos) + - more-to-hear (songs) + - about text + - extra info (bio subtitle) + + Notes + ----- + This is the most complex scraper and extracts ~12 different sections + from the artist page. + """ result = { 'title':'', 'image':'', @@ -552,6 +722,3 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 pass return result - - -print(search()) \ No newline at end of file From 2b777ff4137562728b2becdeddff5076c86a5ead Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 14:46:14 +0330 Subject: [PATCH 06/29] add docstring for utils.py in apple-music-scraper --- Apple-Music-Scraper/utils.py | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 2264ba767e..44ee097414 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -3,6 +3,32 @@ from bs4 import BeautifulSoup def get_cover(url, width, height, format="jpg", crop_option=""): + """ + Generate a full Apple Music artwork URL with proper width, height, format, and crop settings. + + Parameters + ---------- + url : str + The original Apple Music artwork template URL containing `{w}`, `{h}`, `{f}`, `{c}`. + width : int or str + Target width of the image. + height : int or str + Target height of the image. + format : str, optional + Image format (jpg, png, etc.). Defaults to `"jpg"`. + crop_option : str, optional + Cropping mode used by Apple Music artwork URLs. Defaults to empty string. + + Returns + ------- + str + Fully formatted artwork URL. + + Notes + ----- + Apple Music uses dynamic artwork URLs where dimensions and format are embedded + in the URL as placeholders such as `{w}`, `{h}`, `{f}`, and `{c}`. + """ new_url = url.replace("{w}", str(width)) new_url = new_url.replace("{h}", str(height)) new_url = new_url.replace("{c}", crop_option) @@ -10,6 +36,33 @@ def get_cover(url, width, height, format="jpg", crop_option=""): return new_url def convert_album_to_song_url(album_url): + """ + Convert an Apple Music album-track URL into a direct Apple Music song URL. + + Parameters + ---------- + album_url : str + Full Apple Music album URL that contains a track ID via the query parameter `?i=...`. + + Returns + ------- + str or None + Direct Apple Music song URL if `i` parameter exists. + Otherwise, returns `None`. + + Examples + -------- + Input: + https://music.apple.com/us/album/song-name/12345?i=67890 + + Output: + https://music.apple.com/us/song/song-name/67890 + + Notes + ----- + Apple Music album pages embed individual song IDs through the query parameter `i`, + which must be extracted and placed into a `/song/` URL. + """ parsed = urllib.parse.urlparse(album_url) query_params = urllib.parse.parse_qs(parsed.query) song_id = query_params.get('i', [None])[0] @@ -24,6 +77,26 @@ def convert_album_to_song_url(album_url): return f"https://music.apple.com/{country}/song/{title}/{song_id}" def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968534"): + """ + Fetch all singles & EP URLs from an Apple Music artist page. + + Parameters + ---------- + url : str, optional + Base artist page URL. Defaults to the sample King Princess artist link. + + Returns + ------- + list[str] + A list of Apple Music URLs for all singles & EPs for the artist. + + Notes + ----- + - Apple Music loads singles under the `/see-all?section=singles` endpoint. + - This function retrieves the serialized server data, parses the `items` section, + and extracts the correct song/EP URLs. + - Used internally by `artist_scrape()`. + """ result = [] url = url+"/see-all?section=singles" From adb95bab972f8ee9c023e663b5e1e3b39c2b04ba Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:08:04 +0330 Subject: [PATCH 07/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 1208 +++++++++++++++++------------------ 1 file changed, 601 insertions(+), 607 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index cda74e1173..1c1109bb0b 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -2,723 +2,717 @@ import requests, json from utils import * - def room_scrape(link="https://music.apple.com/us/room/6748797380"): - """ - Scrape a shared Apple Music room and extract song URLs. - - Parameters - ---------- - link : str, optional - URL of the Apple Music room page. Defaults to an example room link. - - Returns - ------- - list[str] - List of converted song URLs extracted from the room. - - Notes - ----- - This function parses the `serialized-server-data` script tag within - the Apple Music room HTML, locates the 'copper-track-swoosh' section, - and extracts track URLs. - """ result = [] - headers = { - "User-Agent": "Mozilla/5.0" - } + headers = {"User-Agent": "Mozilla/5.0"} + + try: + rspn = requests.get(link, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + + soup = BeautifulSoup(rspn.text, "html.parser") + items_tag = soup.find("script", {"id": "serialized-server-data"}) + if not items_tag: + return result + + try: + data = json.loads(items_tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result - rspn = requests.get(link, headers=headers) - sup = BeautifulSoup(rspn.text, "html.parser") - items = sup.find('script',{"id":"serialized-server-data"}) - our_json = json.loads(items.text) - sections = our_json[0]['data']['sections'] - - for i in sections: - if "copper-track-swoosh" in i['id']: - items = i['items'] + items = [] + for section in sections: + if "copper-track-swoosh" in section.get("id", ""): + items = section.get("items", []) break - else: - items = [] - - for i in items: - song_url = i['playAction']['actionMetrics']['data'][0]['fields']['actionUrl'] - result.append(convert_album_to_song_url(song_url)) - + + for item in items: + try: + action_url = item["playAction"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + song_url = convert_album_to_song_url(action_url) + if song_url: + result.append(song_url) + except (KeyError, IndexError, TypeError): + continue + return result def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): - """ - Scrape an Apple Music playlist and extract all track URLs. - - Parameters - ---------- - link : str, optional - URL of the Apple Music playlist. Defaults to New Music Daily. - - Returns - ------- - list[str] - List of converted song URLs from the playlist. - - Notes - ----- - Uses the 'track-list' section from Apple Music's internal serialized - server data to extract song action URLs. - """ result = [] - headers = { - "User-Agent": "Mozilla/5.0" - } + headers = {"User-Agent": "Mozilla/5.0"} - rspn = requests.get(link, headers=headers) - sup = BeautifulSoup(rspn.text, "html.parser") - items = sup.find('script',{"id":"serialized-server-data"}) - our_json = json.loads(items.text) - sections = our_json[0]['data']['sections'] - - for i in sections: - if "track-list" in i['id']: - items = i['items'] + try: + rspn = requests.get(link, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + + soup = BeautifulSoup(rspn.text, "html.parser") + items_tag = soup.find("script", {"id": "serialized-server-data"}) + if not items_tag: + return result + + try: + data = json.loads(items_tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result + + items = [] + for section in sections: + if "track-list" in section.get("id", ""): + items = section.get("items", []) break - else: - items = [] - for i in items: - song_url = i['playAction']['actionMetrics']['data'][0]['fields']['actionUrl'] - result.append(convert_album_to_song_url(song_url)) - + for item in items: + try: + action_url = item["playAction"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + song_url = convert_album_to_song_url(action_url) + if song_url: + result.append(song_url) + except (KeyError, IndexError, TypeError): + continue + return result def search(keyword="sasha sloan"): - """ - Search Apple Music for artists, songs, albums, playlists and videos. - - Parameters - ---------- - keyword : str, optional - Search query to send to Apple Music. Defaults to "sasha sloan". - - Returns - ------- - dict - Structured JSON-like dictionary containing search results: - - artists - - albums - - songs - - playlists - - videos - - Notes - ----- - Scrapes `serialized-server-data` to access Apple Music's internal search structure. - """ + result = {"artists": [], "albums": [], "songs": [], "playlists": [], "videos": []} + link = f"https://music.apple.com/us/search?term={keyword}" + headers = {"User-Agent": "Mozilla/5.0"} + + try: + rspn = requests.get(link, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + + soup = BeautifulSoup(rspn.text, "html.parser") + tag = soup.find("script", {"id": "serialized-server-data"}) + if not tag: + return result + + try: + data = json.loads(tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result + + artists = {"items": []} + albums = {"items": []} + songs_block = {"items": []} + playlists = {"items": []} + videos = {"items": []} + + for sec in sections: + sec_id = sec.get("id", "") + if "artist" in sec_id: + artists = sec + elif "album" in sec_id: + albums = sec + elif "song" in sec_id: + songs_block = sec + elif "playlist" in sec_id: + playlists = sec + elif "music_video" in sec_id: + videos = sec + + # Artists + for item in artists.get("items", []): + try: + url = item["contentDescriptor"]["url"] + title = item.get("title", "") + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + img = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + result["artists"].append({"title": title, "url": url, "image": img}) + except (KeyError, TypeError): + continue + + # Albums + for item in albums.get("items", []): + try: + url = item["contentDescriptor"]["url"] + title = item["titleLinks"][0]["title"] + artist = item["subtitleLinks"][0]["title"] + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + img = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + result["albums"].append({"title": title, "artist": artist, "url": url, "image": img}) + except (KeyError, TypeError, IndexError): + continue + + # Songs + for item in songs_block.get("items", []): + try: + url = item["contentDescriptor"]["url"] + title = item.get("title", "") + artist = item["subtitleLinks"][0]["title"] + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + img = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + result["songs"].append({"title": title, "artist": artist, "url": url, "image": img}) + except (KeyError, TypeError, IndexError): + continue + + # Playlists + for item in playlists.get("items", []): + try: + url = item["contentDescriptor"]["url"] + title = item["titleLinks"][0]["title"] + artist = item["subtitleLinks"][0]["title"] + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + img = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + result["playlists"].append({"title": title, "artist": artist, "url": url, "image": img}) + except (KeyError, TypeError, IndexError): + continue + + # Videos + for item in videos.get("items", []): + try: + url = item["contentDescriptor"]["url"] + title = item["titleLinks"][0]["title"] + artist = item["subtitleLinks"][0]["title"] + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + img = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + result["videos"].append({"title": title, "artist": artist, "url": url, "image": img}) + except (KeyError, TypeError, IndexError): + continue + + return result + +def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): result = { - 'artists':[], - 'albums':[], - 'songs':[], - 'playlists':[], - 'videos':[] - } - link = "https://music.apple.com/us/search?term="+keyword - - headers = { - "User-Agent": "Mozilla/5.0" + "title": "", + "image": "", + "kind": "", + "album": {"title": "", "url": ""}, + "artist": {"title": "", "url": ""}, + "more": [], + "preview-url": "", } - - rspn = requests.get(link, headers=headers) + + try: + rspn = requests.get(url, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + soup = BeautifulSoup(rspn.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - sections = our_json[0]['data']['sections'] - - for i in sections: - if "artist" in i['id']: - artists = i - elif "album" in i['id']: - albums = i - elif "song" in i['id']: - songs = i - elif "playlist" in i['id']: - playlists = i - elif "music_video" in i['id']: - videos = i - - try: - artists_result = [] - - for i in artists['items']: - artist = i['title'] - try: - image_url = i['artwork']['dictionary']['url'] - image_width = i['artwork']['dictionary']['width'] - image_height = i[0]['artwork']['dictionary']['height'] - artwork = get_cover(image_url, image_width, image_height) - except: - artwork = "" - - url = i['contentDescriptor']['url'] - artists_result.append({'title':artist, 'url':url, 'image':artwork}) - result['artists'] = artists_result - - except: - pass + tag = soup.find("script", {"id": "serialized-server-data"}) + if not tag: + return result + try: + data = json.loads(tag.text) + sections = data[0]["data"]["sections"] + details = sections[0] + except (KeyError, IndexError, json.JSONDecodeError): + return result try: - albums_result = [] - - for i in albums['items']: - song = i['titleLinks'][0]['title'] - artist = i['subtitleLinks'][0]['title'] - try: - image_url = i['artwork']['dictionary']['url'] - image_width = i['artwork']['dictionary']['width'] - image_height = i[0]['artwork']['dictionary']['height'] - artwork = get_cover(image_url, image_width, image_height) - except: - artwork = "" - - url = i['contentDescriptor']['url'] - albums_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) - result['albums'] = albums_result - - except: - pass + item = details["items"][0] + artwork_dict = item.get("artwork", {}).get("dictionary", {}) + except (KeyError, IndexError, TypeError): + return result + result["title"] = item.get("title", "") + + result["image"] = get_cover( + artwork_dict.get("url", ""), + artwork_dict.get("width", 0), + artwork_dict.get("height", 0), + ) + + result["kind"] = details.get("presentation", {}).get("kind", "") + result["album"]["title"] = item.get("album", "") try: - songs_result = [] - - for i in songs['items']: - song = i['title'] - artist = i['subtitleLinks'][0]['title'] - try: - image_url = i['artwork']['dictionary']['url'] - image_width = i['artwork']['dictionary']['width'] - image_height = i[0]['artwork']['dictionary']['height'] - artwork = get_cover(image_url, image_width, image_height) - except: - artwork = "" - - url = i['contentDescriptor']['url'] - songs_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) - result['songs'] = songs_result - except: + result["album"]["url"] = item["albumLinks"][0]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except (KeyError, IndexError, TypeError): pass - + result["artist"]["title"] = item.get("artists", "") try: - playlists_result = [] - - for i in playlists['items']: - song = i['titleLinks'][0]['title'] - artist = i['subtitleLinks'][0]['title'] - try: - image_url = i['artwork']['dictionary']['url'] - image_width = i['artwork']['dictionary']['width'] - image_height = i[0]['artwork']['dictionary']['height'] - artwork = get_cover(image_url, image_width, image_height) - except: - artwork = "" - - url = i['contentDescriptor']['url'] - playlists_result.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) - result['playlists'] = playlists_result - except: + result["artist"]["url"] = item["artistLinks"][0]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except (KeyError, IndexError, TypeError): pass + try: + json_tag = soup.find("script", {"id": "schema:song", "type": "application/ld+json"}) + schema_data = json.loads(json_tag.string) + result["preview-url"] = schema_data["audio"]["audio"]["contentUrl"] + except (AttributeError, KeyError, TypeError, json.JSONDecodeError): + result["preview-url"] = "" try: - videos_results = [] - - for i in videos['items']: - song = i['titleLinks'][0]['title'] - artist = i['subtitleLinks'][0]['title'] + more_items = sections[-1]["items"] + for m in more_items: try: - image_url = i['artwork']['dictionary']['url'] - image_width = i['artwork']['dictionary']['width'] - image_height = i[0]['artwork']['dictionary']['height'] - artwork = get_cover(image_url, image_width, image_height) - except: - artwork = "" - - url = i['contentDescriptor']['url'] - videos_results.append({'title':song, 'artist':artist, 'url':url, 'image':artwork}) - result['videos'] = videos_results - except: + url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["more"].append(url) + except (KeyError, IndexError, TypeError): + continue + except (KeyError, IndexError, TypeError): pass - - return result -def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): - """ - Scrape a single Apple Music song page and extract metadata. - - Parameters - ---------- - url : str, optional - URL of the Apple Music song. Defaults to sample link. - - Returns - ------- - dict - Dictionary containing: - - title - - image (full resolution) - - kind (song type) - - album info (title + URL) - - artist info (title + URL) - - preview-url - - list of more songs - - Notes - ----- - Uses the `schema:song` JSON-LD tag to extract preview URL. - """ - result = { - 'title':'', - 'image':'', - 'kind':'', - 'album': { - 'title':'', - 'url':'' - }, - 'artist': { - 'title':'', - 'url':'' - }, - 'more':[], - 'preview-url':'' - } - - rspn = requests.get(url) - soup = BeautifulSoup(rspn.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - - song_details = our_json[0]['data']['sections'][0] - - result['title'] = song_details['items'][0]['title'] - - image_url = song_details['items'][0]['artwork']['dictionary']['url'] - image_width = song_details['items'][0]['artwork']['dictionary']['width'] - image_height = song_details['items'][0]['artwork']['dictionary']['height'] - - result['image'] = get_cover(image_url, image_width, image_height) - - result['kind'] = song_details['presentation']['kind'] - result['album']['title'] = song_details['items'][0]['album'] - result['album']['url'] = song_details['items'][0]['albumLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] - result['artist']['title'] = song_details['items'][0]['artists'] - result['artist']['url'] = song_details['items'][0]['artistLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] - - json_tag = soup.find("script", {"id": "schema:song", "type": "application/ld+json"}) - data = json.loads(json_tag.string) - - preview_url = data['audio']['audio']['contentUrl'] - result['preview-url'] = preview_url - - more_songs = our_json[0]['data']['sections'][-1]['items'] - - more_songs_list = [] - - for i in more_songs: - more_songs_list.append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - - result['more'] = more_songs_list - return result def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): - """ - Scrape an Apple Music album page and extract metadata, songs, related albums, videos, etc. - - Parameters - ---------- - url : str, optional - URL of the Apple Music album. Defaults to example album. - - Returns - ------- - dict - Dictionary containing: - - title - - image - - caption/description - - artist info - - song URLs - - album info text - - more songs (same artist) - - similar (recommended) albums - - videos related to the album - - Notes - ----- - Extracts multiple sections such as: - - album-detail - - track-list - - similar albums - - more by artist - - album videos - """ result = { - 'title':'', - 'image':'', - 'caption':'', - 'artist': { - 'title':'', - 'url':'' - }, - 'songs':[], - 'info':'', - 'more':[], - 'similar':[], - 'videos':[] - } - - headers = { - "User-Agent": "Mozilla/5.0" + "title": "", + "image": "", + "caption": "", + "artist": {"title": "", "url": ""}, + "songs": [], + "info": "", + "more": [], + "similar": [], + "videos": [], } - rspn = requests.get(url, headers=headers) + headers = {"User-Agent": "Mozilla/5.0"} + + try: + rspn = requests.get(url, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + soup = BeautifulSoup(rspn.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - sections = our_json[0]['data']['sections'] - - index=0 - for i in sections: - if "album-detail" in i['id']: - album_detail_index = index - elif "track-list " in i['id']: - track_list_index = index - elif "video" in i['id']: - video_index = index - elif "more" in i['id']: - more_index = index - elif "you-might-also-like" in i['id']: - similar_index = index - elif "track-list-section" in i['id']: - track_list_section_index = index - index+=1 - - try: - result['title'] = sections[album_detail_index]['items'][0]['title'] - except: + tag = soup.find("script", {"id": "serialized-server-data"}) + if not tag: + return result + + try: + data = json.loads(tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result + + album_detail_index = None + track_list_index = None + video_index = None + more_index = None + similar_index = None + track_list_section_index = None + + for idx, sec in enumerate(sections): + sec_id = sec.get("id", "") + if "album-detail" in sec_id: + album_detail_index = idx + elif "track-list " in sec_id: + track_list_index = idx + elif "video" in sec_id: + video_index = idx + elif "more" in sec_id: + more_index = idx + elif "you-might-also-like" in sec_id: + similar_index = idx + elif "track-list-section" in sec_id: + track_list_section_index = idx + + # TITLE + try: + item = sections[album_detail_index]["items"][0] + result["title"] = item.get("title", "") + except Exception: pass - + + # IMAGE try: - image_url = sections[album_detail_index]['items'][0]['artwork']['dictionary']['url'] - image_width = sections[album_detail_index]['items'][0]['artwork']['dictionary']['width'] - image_height = sections[album_detail_index]['items'][0]['artwork']['dictionary']['height'] - result['image'] = get_cover(image_url, image_width, image_height) - except: + artwork = item.get("artwork", {}).get("dictionary", {}) + result["image"] = get_cover( + artwork.get("url", ""), + artwork.get("width", 0), + artwork.get("height", 0), + ) + except Exception: pass - + + # CAPTION try: - result['caption'] = sections[album_detail_index]['items'][0]['modalPresentationDescriptor']['paragraphText'] - except: + result["caption"] = item.get("modalPresentationDescriptor", {}).get("paragraphText", "") + except Exception: pass - + + # ARTIST try: - result['artist']['title'] = sections[album_detail_index]['items'][0]['subtitleLinks'][0]['title'] - result['artist']['url'] = sections[album_detail_index]['items'][0]['subtitleLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] - except: + sl = item.get("subtitleLinks", [])[0] + result["artist"]["title"] = sl.get("title", "") + result["artist"]["url"] = sl["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except Exception: pass - + + # SONG LIST try: - album_songs = sections[track_list_index]['items'] - for i in album_songs: - result['songs'].append(convert_album_to_song_url(i['contentDescriptor']['url'])) - except: + track_items = sections[track_list_index].get("items", []) + for it in track_items: + try: + url = it["contentDescriptor"]["url"] + song_url = convert_album_to_song_url(url) + if song_url: + result["songs"].append(song_url) + except Exception: + continue + except Exception: pass - + + # INFO + MORE SONGS try: - result['info'] = sections[track_list_section_index]['items'][0]['description'] - more_songs = sections[more_index]['items'] - for i in more_songs: - result['more'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + desc_item = sections[track_list_section_index]["items"][0] + result["info"] = desc_item.get("description", "") + + more_items = sections[more_index].get("items", []) + for m in more_items: + try: + url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["more"].append(url) + except Exception: + continue + except Exception: pass - + + # SIMILAR try: - similar_songs = sections[similar_index]['items'] - for i in similar_songs: - result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + sim_items = sections[similar_index].get("items", []) + for s in sim_items: + try: + url = s["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["similar"].append(url) + except Exception: + continue + except Exception: pass - + + # VIDEOS try: - videos = sections[video_index]['items'] - for i in videos: - result['videos'].append(i['contentDescriptor']['url']) - except: + vid_items = sections[video_index].get("items", []) + for v in vid_items: + try: + url = v["contentDescriptor"]["url"] + result["videos"].append(url) + except Exception: + continue + except Exception: pass - + return result def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): - """ - Scrape Apple Music music-video page and extract metadata + video file URL. - - Parameters - ---------- - url : str, optional - URL of the Apple Music music-video. Defaults to example. - - Returns - ------- - dict - { - title, - image, - artist: {title, url}, - video-url, - more (same artist), - similar (same genre) - } - - Notes - ----- - Uses JSON-LD block `schema:music-video` to extract the direct video content URL. - """ result = { - 'title': '', - 'image': '', - 'artist': { - 'title': '', - 'url': '' - }, - 'video-url': '', - 'more': [], - 'similar':[] + "title": "", + "image": "", + "artist": {"title": "", "url": ""}, + "video-url": "", + "more": [], + "similar": [], } - headers = { - "User-Agent": "Mozilla/5.0" - } + headers = {"User-Agent": "Mozilla/5.0"} + + try: + rspn = requests.get(url, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + + soup = BeautifulSoup(rspn.text, "html.parser") + tag = soup.find("script", {"id": "serialized-server-data"}) + if not tag: + return result - res = requests.get(url, headers=headers) - soup = BeautifulSoup(res.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - - sections = our_json[0]['data']['sections'] - - for i in sections: - if "music-video-header" in i['id']: - music_video_header = i - elif "more-by-artist" in i['id']: - more = i - elif "more-in-genre" in i['id']: - similar = i - - try: - result['title'] = music_video_header['items'][0]['title'] - except: + try: + data = json.loads(tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result + + music_video_header = None + more = None + similar = None + + for sec in sections: + sec_id = sec.get("id", "") + if "music-video-header" in sec_id: + music_video_header = sec + elif "more-by-artist" in sec_id: + more = sec + elif "more-in-genre" in sec_id: + similar = sec + + # TITLE + try: + item = music_video_header["items"][0] + result["title"] = item.get("title", "") + except Exception: pass - + + # IMAGE try: - image_url = music_video_header['items'][0]['artwork']['dictionary']['url'] - image_width = music_video_header['items'][0]['artwork']['dictionary']['width'] - image_height = music_video_header['items'][0]['artwork']['dictionary']['height'] - result['image'] = get_cover(image_url, image_width, image_height) - except: + artwork = item.get("artwork", {}).get("dictionary", {}) + result["image"] = get_cover( + artwork.get("url", ""), + artwork.get("width", 0), + artwork.get("height", 0), + ) + except Exception: pass - + + # ARTIST try: - result['artist']['title'] = music_video_header['items'][0]['subtitleLinks'][0]['title'] - result['artist']['url'] = music_video_header['items'][0]['subtitleLinks'][0]['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] - except: + sl = item.get("subtitleLinks", [])[0] + result["artist"]["title"] = sl.get("title", "") + result["artist"]["url"] = sl["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except Exception: pass - + + # VIDEO URL try: json_tag = soup.find("script", {"id": "schema:music-video", "type": "application/ld+json"}) - data = json.loads(json_tag.string) - result['video-url'] = data['video']['contentUrl'] - except: + schema_data = json.loads(json_tag.string) + result["video-url"] = schema_data["video"]["contentUrl"] + except (AttributeError, KeyError, TypeError, json.JSONDecodeError): pass - + + # MORE BY ARTIST try: - for i in more['items']: - result['more'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for m in more.get("items", []): + try: + url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["more"].append(url) + except Exception: + continue + except Exception: pass - + + # SIMILAR try: - for i in similar['items']: - result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for s in similar.get("items", []): + try: + url = s["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["similar"].append(url) + except Exception: + continue + except Exception: pass - + return result def artist_scrape(url="https://music.apple.com/us/artist/king-princess/1349968534"): - """ - Scrape an Apple Music artist page and extract all available metadata. - - Parameters - ---------- - url : str, optional - Apple Music artist page URL. Defaults to King Princess sample link. - - Returns - ------- - dict - Dictionary containing: - - title - - image - - latest release URL - - list of top songs - - all albums - - singles & EPs - - playlists - - videos - - similar artists - - appears on - - more-to-see (videos) - - more-to-hear (songs) - - about text - - extra info (bio subtitle) - - Notes - ----- - This is the most complex scraper and extracts ~12 different sections - from the artist page. - """ result = { - 'title':'', - 'image':'', - 'latest':'', - 'top':[], - 'albums':[], - 'singles_and_EP':[], - 'playlists':[], - 'videos':[], - 'similar':[], - 'appears_on':[], - 'more_to_see':[], - 'more_to_hear':[], - 'about':'', - 'info':'', + "title": "", + "image": "", + "latest": "", + "top": [], + "albums": [], + "singles_and_EP": [], + "playlists": [], + "videos": [], + "similar": [], + "appears_on": [], + "more_to_see": [], + "more_to_hear": [], + "about": "", + "info": "", } - - headers = { - "User-Agent": "Mozilla/5.0" - } - - rspn = requests.get(url, headers=headers) - soup = BeautifulSoup(rspn.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - - sections = our_json[0]['data']['sections'] - - for i in sections: - if "artist-detail-header-section" in i['id']: - artist_detail = i - elif "latest-release-and-top-songs" in i['id']: - latest_and_top = i - elif "full-albums" in i['id']: - albums = i - elif "playlists" in i['id']: - playlists = i - elif "music-videos" in i['id']: - videos = i - elif "singles" in i['id']: - singles = i - elif "appears-on" in i['id']: - appears_on = i - elif "more-to-see" in i['id']: - more_to_see = i - elif "more-to-hear" in i['id']: - more_to_hear = i - elif "artist-bio" in i['id']: - bio = i - elif "similar-artists" in i['id']: - similar = i - - try: - result['title'] = artist_detail['items'][0]['title'] - except: - pass - - try: - image_url = artist_detail['items'][0]['artwork']['dictionary']['url'] - image_width = artist_detail['items'][0]['artwork']['dictionary']['width'] - image_height = artist_detail['items'][0]['artwork']['dictionary']['height'] - result['image'] = get_cover(image_url, image_width, image_height) - except: - pass - - try: - result['latest'] = latest_and_top['pinnedLeadingItem']['item']['segue']['actionMetrics']['data'][0]['fields']['actionUrl'] - except: - pass - try: - for i in latest_and_top['items']: - result['top'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: - pass + headers = {"User-Agent": "Mozilla/5.0"} try: - for i in albums['items']: - result['albums'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + rspn = requests.get(url, headers=headers, timeout=10) + rspn.raise_for_status() + except (requests.RequestException, Exception): + return result + + soup = BeautifulSoup(rspn.text, "html.parser") + tag = soup.find("script", {"id": "serialized-server-data"}) + if not tag: + return result + + try: + data = json.loads(tag.text) + sections = data[0]["data"]["sections"] + except (KeyError, IndexError, json.JSONDecodeError): + return result + + artist_detail = None + latest_and_top = None + albums = None + playlists = None + videos = None + singles = None + appears_on = None + more_to_see = None + more_to_hear = None + bio = None + similar = None + + for sec in sections: + sec_id = sec.get("id", "") + if "artist-detail-header-section" in sec_id: + artist_detail = sec + elif "latest-release-and-top-songs" in sec_id: + latest_and_top = sec + elif "full-albums" in sec_id: + albums = sec + elif "playlists" in sec_id: + playlists = sec + elif "music-videos" in sec_id: + videos = sec + elif "singles" in sec_id: + singles = sec + elif "appears-on" in sec_id: + appears_on = sec + elif "more-to-see" in sec_id: + more_to_see = sec + elif "more-to-hear" in sec_id: + more_to_hear = sec + elif "artist-bio" in sec_id: + bio = sec + elif "similar-artists" in sec_id: + similar = sec + + # HEADER + try: + item = artist_detail["items"][0] + result["title"] = item.get("title", "") + artwork = item.get("artwork", {}).get("dictionary", {}) + result["image"] = get_cover( + artwork.get("url", ""), + artwork.get("width", 0), + artwork.get("height", 0), + ) + except Exception: + pass + + # LATEST + try: + result["latest"] = latest_and_top["pinnedLeadingItem"]["item"]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except Exception: + pass + + # TOP SONGS + try: + for it in latest_and_top.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["top"].append(url) + except Exception: + continue + except Exception: pass + # ALBUMS try: - result['singles_and_EP'] = get_all_singles(url) - except: + for it in albums.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["albums"].append(url) + except Exception: + continue + except Exception: pass + # SINGLES & EP try: - for i in playlists['items']: - result['playlists'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + result["singles_and_EP"] = get_all_singles(url) + except Exception: pass + # PLAYLISTS try: - for i in videos['items']: - result['videos'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for it in playlists.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["playlists"].append(url) + except Exception: + continue + except Exception: pass + # VIDEOS try: - for i in similar['items']: - result['similar'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for it in videos.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["videos"].append(url) + except Exception: + continue + except Exception: pass + # SIMILAR try: - for i in appears_on['items']: - result['appears_on'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for it in similar.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["similar"].append(url) + except Exception: + continue + except Exception: pass + # APPEARS ON try: - for i in more_to_see['items']: - result['more_to_see'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for it in appears_on.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["appears_on"].append(url) + except Exception: + continue + except Exception: pass + # MORE TO SEE try: - for i in more_to_hear['items']: - result['more_to_hear'].append(i['segue']['actionMetrics']['data'][0]['fields']['actionUrl']) - except: + for it in more_to_see.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["more_to_see"].append(url) + except Exception: + continue + except Exception: pass + # MORE TO HEAR try: - result['about'] = bio['items'][0]['modalPresentationDescriptor']['paragraphText'] - except: + for it in more_to_hear.get("items", []): + try: + url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["more_to_hear"].append(url) + except Exception: + continue + except Exception: pass + # ABOUT try: - result['info'] = bio['items'][0]['modalPresentationDescriptor']['headerSubtitle'] - except: + item = bio["items"][0] + mpd = item.get("modalPresentationDescriptor", {}) + result["about"] = mpd.get("paragraphText", "") + result["info"] = mpd.get("headerSubtitle", "") + except Exception: pass return result From 9a1a02c82689631c76e754f305524b1de2aeb524 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:09:41 +0330 Subject: [PATCH 08/29] add test func for apple-music-scraper --- Apple-Music-Scraper/main.py | 67 +++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 1c1109bb0b..370a048c3b 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -716,3 +716,70 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 pass return result + +def test_all_functions(): + """ + Test all scraper functions with sample URLs. + Prints results count / key fields to verify basic functionality. + """ + + print("\n=== TEST: room_scrape ===") + try: + r = room_scrape() + print("Room items:", len(r)) + except Exception as e: + print("room_scrape ERROR:", e) + + print("\n=== TEST: playlist_scrape ===") + try: + p = playlist_scrape() + print("Playlist items:", len(p)) + except Exception as e: + print("playlist_scrape ERROR:", e) + + print("\n=== TEST: search ===") + try: + s = search("night tapes") + print("Artists:", len(s.get("artists", []))) + print("Albums:", len(s.get("albums", []))) + print("Songs:", len(s.get("songs", []))) + print("Playlists:", len(s.get("playlists", []))) + print("Videos:", len(s.get("videos", []))) + except Exception as e: + print("search ERROR:", e) + + print("\n=== TEST: song_scrape ===") + try: + song = song_scrape("https://music.apple.com/us/song/california/1821538031") + print("Song title:", song.get("title")) + print("Preview URL exists:", bool(song.get("preview-url"))) + except Exception as e: + print("song_scrape ERROR:", e) + + print("\n=== TEST: album_scrape ===") + try: + album = album_scrape("https://music.apple.com/us/album/1965/1817707266?i=1817707585") + print("Album title:", album.get("title")) + print("Songs:", len(album.get("songs", []))) + except Exception as e: + print("album_scrape ERROR:", e) + + print("\n=== TEST: video_scrape ===") + try: + video = video_scrape("https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026") + print("Video title:", video.get("title")) + print("Video URL exists:", bool(video.get("video-url"))) + except Exception as e: + print("video_scrape ERROR:", e) + + print("\n=== TEST: artist_scrape ===") + try: + artist = artist_scrape("https://music.apple.com/us/artist/king-princess/1349968534") + print("Artist title:", artist.get("title")) + print("Top songs:", len(artist.get("top", []))) + print("Albums:", len(artist.get("albums", []))) + print("Videos:", len(artist.get("videos", []))) + except Exception as e: + print("artist_scrape ERROR:", e) + + print("\n=== ALL TESTS COMPLETED ===") From d9f30104a0703eb5090b62366a4d536150008002 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:12:19 +0330 Subject: [PATCH 09/29] change code style for ai checks --- Apple-Music-Scraper/utils.py | 159 ++++++++++++++--------------------- 1 file changed, 65 insertions(+), 94 deletions(-) diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 44ee097414..8d118cbe0d 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -1,117 +1,88 @@ import urllib.parse -import requests, json +import requests +import json from bs4 import BeautifulSoup + def get_cover(url, width, height, format="jpg", crop_option=""): """ Generate a full Apple Music artwork URL with proper width, height, format, and crop settings. - - Parameters - ---------- - url : str - The original Apple Music artwork template URL containing `{w}`, `{h}`, `{f}`, `{c}`. - width : int or str - Target width of the image. - height : int or str - Target height of the image. - format : str, optional - Image format (jpg, png, etc.). Defaults to `"jpg"`. - crop_option : str, optional - Cropping mode used by Apple Music artwork URLs. Defaults to empty string. - - Returns - ------- - str - Fully formatted artwork URL. - - Notes - ----- - Apple Music uses dynamic artwork URLs where dimensions and format are embedded - in the URL as placeholders such as `{w}`, `{h}`, `{f}`, and `{c}`. """ - new_url = url.replace("{w}", str(width)) - new_url = new_url.replace("{h}", str(height)) - new_url = new_url.replace("{c}", crop_option) - new_url = new_url.replace("{f}", format) - return new_url + if not isinstance(url, str): + return url + + try: + new_url = ( + url.replace("{w}", str(width)) + .replace("{h}", str(height)) + .replace("{c}", crop_option) + .replace("{f}", format) + ) + return new_url + except (TypeError, AttributeError): + return url + def convert_album_to_song_url(album_url): """ Convert an Apple Music album-track URL into a direct Apple Music song URL. - - Parameters - ---------- - album_url : str - Full Apple Music album URL that contains a track ID via the query parameter `?i=...`. - - Returns - ------- - str or None - Direct Apple Music song URL if `i` parameter exists. - Otherwise, returns `None`. - - Examples - -------- - Input: - https://music.apple.com/us/album/song-name/12345?i=67890 - - Output: - https://music.apple.com/us/song/song-name/67890 - - Notes - ----- - Apple Music album pages embed individual song IDs through the query parameter `i`, - which must be extracted and placed into a `/song/` URL. """ - parsed = urllib.parse.urlparse(album_url) - query_params = urllib.parse.parse_qs(parsed.query) - song_id = query_params.get('i', [None])[0] + try: + parsed = urllib.parse.urlparse(album_url) + query_params = urllib.parse.parse_qs(parsed.query) + song_id = query_params.get("i", [None])[0] - if not song_id: - return None + if not song_id: + return None + + parts = parsed.path.split("/") + if len(parts) < 4: + return None - parts = parsed.path.split('/') - country = parts[1] - title = parts[3] + country = parts[1] + title = parts[3] + + return f"https://music.apple.com/{country}/song/{title}/{song_id}" + + except (IndexError, KeyError, TypeError, AttributeError, ValueError): + return None - return f"https://music.apple.com/{country}/song/{title}/{song_id}" def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968534"): """ Fetch all singles & EP URLs from an Apple Music artist page. - - Parameters - ---------- - url : str, optional - Base artist page URL. Defaults to the sample King Princess artist link. - - Returns - ------- - list[str] - A list of Apple Music URLs for all singles & EPs for the artist. - - Notes - ----- - - Apple Music loads singles under the `/see-all?section=singles` endpoint. - - This function retrieves the serialized server data, parses the `items` section, - and extracts the correct song/EP URLs. - - Used internally by `artist_scrape()`. """ result = [] - url = url+"/see-all?section=singles" - - headers = { - "User-Agent": "Mozilla/5.0" - } - res = requests.get(url, headers=headers) + full_url = f"{url}/see-all?section=singles" + headers = {"User-Agent": "Mozilla/5.0"} + + try: + res = requests.get(full_url, headers=headers, timeout=10) + res.raise_for_status() + except requests.RequestException: + return result + soup = BeautifulSoup(res.text, "html.parser") - items = soup.find('script', {'id': 'serialized-server-data'}) - our_json = json.loads(items.text) - - sections = our_json[0]['data']['sections'][0]['items'] - - for i in sections: - result.append((i['segue']['actionMetrics']['data'][0]['fields']['actionUrl'])) - - return result \ No newline at end of file + script_tag = soup.find("script", {"id": "serialized-server-data"}) + if not script_tag: + return result + + try: + data = json.loads(script_tag.text) + sections = data[0]["data"]["sections"] + if not sections: + return result + + items = sections[0].get("items", []) + except (json.JSONDecodeError, KeyError, IndexError, TypeError): + return result + + for item in items: + try: + action_url = item["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result.append(action_url) + except (KeyError, IndexError, TypeError): + continue + + return result From d6bc2e74be8d9cf7246b8e668a10cb57cab8dc37 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:17:22 +0330 Subject: [PATCH 10/29] add docstring for room scrape fun in apple-music-scraper --- Apple-Music-Scraper/main.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 370a048c3b..ea7778a922 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -2,7 +2,27 @@ import requests, json from utils import * + def room_scrape(link="https://music.apple.com/us/room/6748797380"): + """ + Scrape a shared Apple Music room and extract song URLs. + + Parameters + ---------- + link : str, optional + URL of the Apple Music room page. Defaults to an example room link. + + Returns + ------- + list[str] + List of converted song URLs extracted from the room. + + Notes + ----- + This function parses the `serialized-server-data` script tag within + the Apple Music room HTML, locates the 'copper-track-swoosh' section, + and extracts track URLs. + """ result = [] headers = {"User-Agent": "Mozilla/5.0"} @@ -40,6 +60,7 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): return result + def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): result = [] headers = {"User-Agent": "Mozilla/5.0"} @@ -78,6 +99,7 @@ def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl return result + def search(keyword="sasha sloan"): result = {"artists": [], "albums": [], "songs": [], "playlists": [], "videos": []} link = f"https://music.apple.com/us/search?term={keyword}" @@ -200,6 +222,7 @@ def search(keyword="sasha sloan"): return result + def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): result = { "title": "", @@ -278,6 +301,7 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): return result + def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): result = { "title": "", @@ -419,6 +443,7 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 return result + def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): result = { "title": "", @@ -519,6 +544,7 @@ def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualiz return result + def artist_scrape(url="https://music.apple.com/us/artist/king-princess/1349968534"): result = { "title": "", From 648485e29daafc00dbeec976872ebba41c5a175b Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:21:18 +0330 Subject: [PATCH 11/29] update docstring for main.py in apple-music-scraper --- Apple-Music-Scraper/main.py | 176 +++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 2 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index ea7778a922..65e2e4af8e 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -62,6 +62,24 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): + """ + Scrape an Apple Music playlist and extract all track URLs. + + Parameters + ---------- + link : str, optional + URL of the Apple Music playlist. Defaults to New Music Daily. + + Returns + ------- + list[str] + List of converted song URLs from the playlist. + + Notes + ----- + Uses the 'track-list' section from Apple Music's internal serialized + server data to extract song action URLs. + """ result = [] headers = {"User-Agent": "Mozilla/5.0"} @@ -101,6 +119,28 @@ def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl def search(keyword="sasha sloan"): + """ + Search Apple Music for artists, songs, albums, playlists and videos. + + Parameters + ---------- + keyword : str, optional + Search query to send to Apple Music. Defaults to "sasha sloan". + + Returns + ------- + dict + Structured JSON-like dictionary containing search results: + - artists + - albums + - songs + - playlists + - videos + + Notes + ----- + Scrapes `serialized-server-data` to access Apple Music's internal search structure. + """ result = {"artists": [], "albums": [], "songs": [], "playlists": [], "videos": []} link = f"https://music.apple.com/us/search?term={keyword}" headers = {"User-Agent": "Mozilla/5.0"} @@ -224,6 +264,30 @@ def search(keyword="sasha sloan"): def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): + """ + Scrape a single Apple Music song page and extract metadata. + + Parameters + ---------- + url : str, optional + URL of the Apple Music song. Defaults to sample link. + + Returns + ------- + dict + Dictionary containing: + - title + - image (full resolution) + - kind (song type) + - album info (title + URL) + - artist info (title + URL) + - preview-url + - list of more songs + + Notes + ----- + Uses the `schema:song` JSON-LD tag to extract preview URL. + """ result = { "title": "", "image": "", @@ -303,6 +367,37 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): + """ + Scrape an Apple Music album page and extract metadata, songs, related albums, videos, etc. + + Parameters + ---------- + url : str, optional + URL of the Apple Music album. Defaults to example album. + + Returns + ------- + dict + Dictionary containing: + - title + - image + - caption/description + - artist info + - song URLs + - album info text + - more songs (same artist) + - similar (recommended) albums + - videos related to the album + + Notes + ----- + Extracts multiple sections such as: + - album-detail + - track-list + - similar albums + - more by artist + - album videos + """ result = { "title": "", "image": "", @@ -445,6 +540,30 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): + """ + Scrape Apple Music music-video page and extract metadata + video file URL. + + Parameters + ---------- + url : str, optional + URL of the Apple Music music-video. Defaults to example. + + Returns + ------- + dict + { + title, + image, + artist: {title, url}, + video-url, + more (same artist), + similar (same genre) + } + + Notes + ----- + Uses JSON-LD block `schema:music-video` to extract the direct video content URL. + """ result = { "title": "", "image": "", @@ -546,6 +665,38 @@ def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualiz def artist_scrape(url="https://music.apple.com/us/artist/king-princess/1349968534"): + """ + Scrape an Apple Music artist page and extract all available metadata. + + Parameters + ---------- + url : str, optional + Apple Music artist page URL. Defaults to King Princess sample link. + + Returns + ------- + dict + Dictionary containing: + - title + - image + - latest release URL + - list of top songs + - all albums + - singles & EPs + - playlists + - videos + - similar artists + - appears on + - more-to-see (videos) + - more-to-hear (songs) + - about text + - extra info (bio subtitle) + + Notes + ----- + This is the most complex scraper and extracts ~12 different sections + from the artist page. + """ result = { "title": "", "image": "", @@ -745,8 +896,29 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 def test_all_functions(): """ - Test all scraper functions with sample URLs. - Prints results count / key fields to verify basic functionality. + Run integration-style tests for all scraper functions. + + This function executes each scraper with sample inputs to verify that: + - The function runs without raising exceptions. + - The returned structures contain expected keys. + - Basic counts (number of items, presence of preview/video URLs, etc.) + match minimal sanity expectations. + + Tests performed: + 1. room_scrape() – prints number of room items. + 2. playlist_scrape() – prints number of playlist items. + 3. search() – searches for "night tapes" and prints result counts. + 4. song_scrape() – scrapes a sample Apple Music song URL. + 5. album_scrape() – scrapes a sample Apple Music album URL. + 6. video_scrape() – scrapes a sample Apple Music video URL. + 7. artist_scrape() – scrapes a sample Apple Music artist page. + + This is not a formal unit test suite, but a quick manual verification tool + intended to confirm scraper functionality during development. + + Prints: + - Counts of returned items. + - Key fields such as title, preview-url existence, etc. """ print("\n=== TEST: room_scrape ===") From 06a83ec678fb890a163216994955b73cabf8f999 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:22:11 +0330 Subject: [PATCH 12/29] update docstring for utils.py in apple-music-scraper --- Apple-Music-Scraper/utils.py | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 8d118cbe0d..2cdb86c69f 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -7,6 +7,29 @@ def get_cover(url, width, height, format="jpg", crop_option=""): """ Generate a full Apple Music artwork URL with proper width, height, format, and crop settings. + + Parameters + ---------- + url : str + The original Apple Music artwork template URL containing `{w}`, `{h}`, `{f}`, `{c}`. + width : int or str + Target width of the image. + height : int or str + Target height of the image. + format : str, optional + Image format (jpg, png, etc.). Defaults to `"jpg"`. + crop_option : str, optional + Cropping mode used by Apple Music artwork URLs. Defaults to empty string. + + Returns + ------- + str + Fully formatted artwork URL. + + Notes + ----- + Apple Music uses dynamic artwork URLs where dimensions and format are embedded + in the URL as placeholders such as `{w}`, `{h}`, `{f}`, and `{c}`. """ if not isinstance(url, str): return url @@ -26,6 +49,30 @@ def get_cover(url, width, height, format="jpg", crop_option=""): def convert_album_to_song_url(album_url): """ Convert an Apple Music album-track URL into a direct Apple Music song URL. + + Parameters + ---------- + album_url : str + Full Apple Music album URL that contains a track ID via the query parameter `?i=...`. + + Returns + ------- + str or None + Direct Apple Music song URL if `i` parameter exists. + Otherwise, returns `None`. + + Examples + -------- + Input: + https://music.apple.com/us/album/song-name/12345?i=67890 + + Output: + https://music.apple.com/us/song/song-name/67890 + + Notes + ----- + Apple Music album pages embed individual song IDs through the query parameter `i`, + which must be extracted and placed into a `/song/` URL. """ try: parsed = urllib.parse.urlparse(album_url) @@ -51,6 +98,23 @@ def convert_album_to_song_url(album_url): def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968534"): """ Fetch all singles & EP URLs from an Apple Music artist page. + + Parameters + ---------- + url : str, optional + Base artist page URL. Defaults to the sample King Princess artist link. + + Returns + ------- + list[str] + A list of Apple Music URLs for all singles & EPs for the artist. + + Notes + ----- + - Apple Music loads singles under the `/see-all?section=singles` endpoint. + - This function retrieves the serialized server data, parses the `items` section, + and extracts the correct song/EP URLs. + - Used internally by `artist_scrape()`. """ result = [] From 76db52a96641a652b2cf469a5014ec86fd7c0f7d Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 15:27:30 +0330 Subject: [PATCH 13/29] update artist_scrape func for Code Quality Check --- Apple-Music-Scraper/main.py | 47 ++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 65e2e4af8e..8785f3a637 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -738,7 +738,6 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 albums = None playlists = None videos = None - singles = None appears_on = None more_to_see = None more_to_hear = None @@ -757,8 +756,6 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 playlists = sec elif "music-videos" in sec_id: videos = sec - elif "singles" in sec_id: - singles = sec elif "appears-on" in sec_id: appears_on = sec elif "more-to-see" in sec_id: @@ -793,8 +790,9 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in latest_and_top.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["top"].append(url) + result["top"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: @@ -804,14 +802,15 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in albums.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["albums"].append(url) + result["albums"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: pass - # SINGLES & EP + # SINGLES & EPs try: result["singles_and_EP"] = get_all_singles(url) except Exception: @@ -821,8 +820,9 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in playlists.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["playlists"].append(url) + result["playlists"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: @@ -832,19 +832,21 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in videos.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["videos"].append(url) + result["videos"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: pass - # SIMILAR + # SIMILAR ARTISTS try: for it in similar.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["similar"].append(url) + result["similar"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: @@ -854,8 +856,9 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in appears_on.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["appears_on"].append(url) + result["appears_on"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: @@ -865,8 +868,9 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in more_to_see.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["more_to_see"].append(url) + result["more_to_see"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: @@ -876,8 +880,9 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: for it in more_to_hear.get("items", []): try: - url = it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - result["more_to_hear"].append(url) + result["more_to_hear"].append( + it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: continue except Exception: From 9d4b80abbf665f490938ea735dfb82e1b0fe9d75 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 16:45:30 +0330 Subject: [PATCH 14/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 1 + Apple-Music-Scraper/utils.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 8785f3a637..57cd216d88 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -899,6 +899,7 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 return result + def test_all_functions(): """ Run integration-style tests for all scraper functions. diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 2cdb86c69f..59975f32b3 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -4,7 +4,7 @@ from bs4 import BeautifulSoup -def get_cover(url, width, height, format="jpg", crop_option=""): +def get_cover(url, width, height, img_format="jpg", crop_option=""): """ Generate a full Apple Music artwork URL with proper width, height, format, and crop settings. @@ -16,8 +16,8 @@ def get_cover(url, width, height, format="jpg", crop_option=""): Target width of the image. height : int or str Target height of the image. - format : str, optional - Image format (jpg, png, etc.). Defaults to `"jpg"`. + img_format : str, optional + Image format (jpg, png, etc.). Defaults to "jpg". crop_option : str, optional Cropping mode used by Apple Music artwork URLs. Defaults to empty string. @@ -39,7 +39,7 @@ def get_cover(url, width, height, format="jpg", crop_option=""): url.replace("{w}", str(width)) .replace("{h}", str(height)) .replace("{c}", crop_option) - .replace("{f}", format) + .replace("{f}", img_format) ) return new_url except (TypeError, AttributeError): From 1b153b3fcd0a0b8b324a7ee120b53ba60622b323 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 16:48:39 +0330 Subject: [PATCH 15/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 57cd216d88..e8507ed265 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -29,7 +29,7 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): try: rspn = requests.get(link, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -86,7 +86,7 @@ def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl try: rspn = requests.get(link, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -148,7 +148,7 @@ def search(keyword="sasha sloan"): try: rspn = requests.get(link, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -301,7 +301,7 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): try: rspn = requests.get(url, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -415,7 +415,7 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 try: rspn = requests.get(url, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -578,7 +578,7 @@ def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualiz try: rspn = requests.get(url, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") @@ -719,7 +719,7 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 try: rspn = requests.get(url, headers=headers, timeout=10) rspn.raise_for_status() - except (requests.RequestException, Exception): + except Exception: return result soup = BeautifulSoup(rspn.text, "html.parser") From 4b2b5e196245321f212c3d31d276e830349ba409 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 16:49:54 +0330 Subject: [PATCH 16/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index e8507ed265..ac693d8b7a 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup import requests, json -from utils import * +from utils import convert_album_to_song_url, get_cover, get_all_singles def room_scrape(link="https://music.apple.com/us/room/6748797380"): From 7b2fd8710af3bb97c55d2ed2e36a8adb40899839 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 17:06:18 +0330 Subject: [PATCH 17/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 123 +++++++++++++++++++++++++++++------ Apple-Music-Scraper/utils.py | 5 +- 2 files changed, 107 insertions(+), 21 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index ac693d8b7a..56d6c5b60d 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -1,5 +1,6 @@ from bs4 import BeautifulSoup -import requests, json +import requests +import json from utils import convert_album_to_song_url, get_cover, get_all_singles @@ -51,7 +52,10 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): for item in items: try: - action_url = item["playAction"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + action_url = ( + item["playAction"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) song_url = convert_album_to_song_url(action_url) if song_url: result.append(song_url) @@ -61,7 +65,12 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): return result -def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5"): +def playlist_scrape( + link=( + "https://music.apple.com/us/playlist" + "/new-music-daily/pl.2b0e6e332fdf4b7a91164da3162127b5" + ), +): """ Scrape an Apple Music playlist and extract all track URLs. @@ -108,7 +117,10 @@ def playlist_scrape(link="https://music.apple.com/us/playlist/new-music-daily/pl for item in items: try: - action_url = item["playAction"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + action_url = ( + item["playAction"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) song_url = convert_album_to_song_url(action_url) if song_url: result.append(song_url) @@ -208,7 +220,14 @@ def search(keyword="sasha sloan"): artwork_dict.get("width", 0), artwork_dict.get("height", 0), ) - result["albums"].append({"title": title, "artist": artist, "url": url, "image": img}) + result["albums"].append( + { + "title": title, + "artist": artist, + "url": url, + "image": img + } + ) except (KeyError, TypeError, IndexError): continue @@ -224,7 +243,14 @@ def search(keyword="sasha sloan"): artwork_dict.get("width", 0), artwork_dict.get("height", 0), ) - result["songs"].append({"title": title, "artist": artist, "url": url, "image": img}) + result["songs"].append( + { + "title": title, + "artist": artist, + "url": url, + "image": img + } + ) except (KeyError, TypeError, IndexError): continue @@ -240,7 +266,14 @@ def search(keyword="sasha sloan"): artwork_dict.get("width", 0), artwork_dict.get("height", 0), ) - result["playlists"].append({"title": title, "artist": artist, "url": url, "image": img}) + result["playlists"].append( + { + "title": title, + "artist": artist, + "url": url, + "image": img + } + ) except (KeyError, TypeError, IndexError): continue @@ -256,7 +289,14 @@ def search(keyword="sasha sloan"): artwork_dict.get("width", 0), artwork_dict.get("height", 0), ) - result["videos"].append({"title": title, "artist": artist, "url": url, "image": img}) + result["videos"].append( + { + "title": title, + "artist": artist, + "url": url, + "image": img + } + ) except (KeyError, TypeError, IndexError): continue @@ -334,19 +374,31 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): result["album"]["title"] = item.get("album", "") try: - result["album"]["url"] = item["albumLinks"][0]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["album"]["url"] = ( + item["albumLinks"][0]["segue"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) except (KeyError, IndexError, TypeError): pass result["artist"]["title"] = item.get("artists", "") try: - result["artist"]["url"] = item["artistLinks"][0]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["artist"]["url"] = ( + item["artistLinks"][0]["segue"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) except (KeyError, IndexError, TypeError): pass try: - json_tag = soup.find("script", {"id": "schema:song", "type": "application/ld+json"}) + json_tag = soup.find( + "script", + { + "id": "schema:song", + "type": "application/ld+json" + } + ) schema_data = json.loads(json_tag.string) result["preview-url"] = schema_data["audio"]["audio"]["contentUrl"] except (AttributeError, KeyError, TypeError, json.JSONDecodeError): @@ -471,7 +523,10 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 # CAPTION try: - result["caption"] = item.get("modalPresentationDescriptor", {}).get("paragraphText", "") + result["caption"] = item.get( + "modalPresentationDescriptor", + {} + ).get("paragraphText", "") except Exception: pass @@ -479,7 +534,10 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 try: sl = item.get("subtitleLinks", [])[0] result["artist"]["title"] = sl.get("title", "") - result["artist"]["url"] = sl["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["artist"]["url"] = ( + sl["segue"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) except Exception: pass @@ -539,7 +597,12 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 return result -def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026"): +def video_scrape( + url=( + "https://music.apple.com/us/music-video/" + "gucci-mane-visualizer/1810547026" + ), +): """ Scrape Apple Music music-video page and extract metadata + video file URL. @@ -627,13 +690,22 @@ def video_scrape(url="https://music.apple.com/us/music-video/gucci-mane-visualiz try: sl = item.get("subtitleLinks", [])[0] result["artist"]["title"] = sl.get("title", "") - result["artist"]["url"] = sl["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["artist"]["url"] = ( + sl["segue"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) except Exception: pass # VIDEO URL try: - json_tag = soup.find("script", {"id": "schema:music-video", "type": "application/ld+json"}) + json_tag = soup.find( + "script", + { + "id": "schema:music-video", + "type": "application/ld+json" + } + ) schema_data = json.loads(json_tag.string) result["video-url"] = schema_data["video"]["contentUrl"] except (AttributeError, KeyError, TypeError, json.JSONDecodeError): @@ -782,7 +854,10 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 # LATEST try: - result["latest"] = latest_and_top["pinnedLeadingItem"]["item"]["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + result["latest"] = ( + latest_and_top["pinnedLeadingItem"]["item"]["segue"] + ["actionMetrics"]["data"][0]["fields"]["actionUrl"] + ) except Exception: pass @@ -962,7 +1037,9 @@ def test_all_functions(): print("\n=== TEST: album_scrape ===") try: - album = album_scrape("https://music.apple.com/us/album/1965/1817707266?i=1817707585") + album = album_scrape( + "https://music.apple.com/us/album/1965/1817707266?i=1817707585" + ) print("Album title:", album.get("title")) print("Songs:", len(album.get("songs", []))) except Exception as e: @@ -970,7 +1047,9 @@ def test_all_functions(): print("\n=== TEST: video_scrape ===") try: - video = video_scrape("https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026") + video = video_scrape( + "https://music.apple.com/us/music-video/gucci-mane-visualizer/1810547026" + ) print("Video title:", video.get("title")) print("Video URL exists:", bool(video.get("video-url"))) except Exception as e: @@ -978,7 +1057,9 @@ def test_all_functions(): print("\n=== TEST: artist_scrape ===") try: - artist = artist_scrape("https://music.apple.com/us/artist/king-princess/1349968534") + artist = artist_scrape( + "https://music.apple.com/us/artist/king-princess/1349968534" + ) print("Artist title:", artist.get("title")) print("Top songs:", len(artist.get("top", []))) print("Albums:", len(artist.get("albums", []))) @@ -987,3 +1068,5 @@ def test_all_functions(): print("artist_scrape ERROR:", e) print("\n=== ALL TESTS COMPLETED ===") + +test_all_functions() \ No newline at end of file diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 59975f32b3..9cd9cb3baf 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -144,7 +144,10 @@ def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968 for item in items: try: - action_url = item["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + action_url = ( + item["segue"]["actionMetrics"] + ["data"][0]["fields"]["actionUrl"] + ) result.append(action_url) except (KeyError, IndexError, TypeError): continue From 74b365ff7242f24e4cd76d4141b93d3e021efda4 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 17:21:07 +0330 Subject: [PATCH 18/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 20 +++++++++----------- Apple-Music-Scraper/utils.py | 9 ++++++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 56d6c5b60d..5d0af16de0 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -20,8 +20,8 @@ def room_scrape(link="https://music.apple.com/us/room/6748797380"): Notes ----- - This function parses the `serialized-server-data` script tag within - the Apple Music room HTML, locates the 'copper-track-swoosh' section, + This function parses the `serialized-server-data` script tag within + the Apple Music room HTML, locates the 'copper-track-swoosh' section, and extracts track URLs. """ result = [] @@ -222,9 +222,9 @@ def search(keyword="sasha sloan"): ) result["albums"].append( { - "title": title, - "artist": artist, - "url": url, + "title": title, + "artist": artist, + "url": url, "image": img } ) @@ -247,7 +247,7 @@ def search(keyword="sasha sloan"): { "title": title, "artist": artist, - "url": url, + "url": url, "image": img } ) @@ -420,7 +420,8 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707585"): """ - Scrape an Apple Music album page and extract metadata, songs, related albums, videos, etc. + Scrape an Apple Music album page + and extract metadata, songs, related albums, videos, etc. Parameters ---------- @@ -766,7 +767,7 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 Notes ----- - This is the most complex scraper and extracts ~12 different sections + This is the most complex scraper and extracts ~12 different sections from the artist page. """ result = { @@ -1001,7 +1002,6 @@ def test_all_functions(): - Counts of returned items. - Key fields such as title, preview-url existence, etc. """ - print("\n=== TEST: room_scrape ===") try: r = room_scrape() @@ -1068,5 +1068,3 @@ def test_all_functions(): print("artist_scrape ERROR:", e) print("\n=== ALL TESTS COMPLETED ===") - -test_all_functions() \ No newline at end of file diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 9cd9cb3baf..8faac597b6 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -6,12 +6,14 @@ def get_cover(url, width, height, img_format="jpg", crop_option=""): """ - Generate a full Apple Music artwork URL with proper width, height, format, and crop settings. + Generate a full Apple Music artwork URL\ + with proper width, height, format, and crop settings. Parameters ---------- url : str - The original Apple Music artwork template URL containing `{w}`, `{h}`, `{f}`, `{c}`. + The original Apple Music artwork template URL + containing `{w}`, `{h}`, `{f}`, `{c}`. width : int or str Target width of the image. height : int or str @@ -53,7 +55,8 @@ def convert_album_to_song_url(album_url): Parameters ---------- album_url : str - Full Apple Music album URL that contains a track ID via the query parameter `?i=...`. + Full Apple Music album URL that + contains a track ID via the query parameter `?i=...`. Returns ------- From a44bd620f9a33057d4b4893bcfcbf1413dcf5e3f Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 17:53:27 +0330 Subject: [PATCH 19/29] add some helper funcs to reduce complexity --- Apple-Music-Scraper/main.py | 210 +++++++++++------------------------- 1 file changed, 65 insertions(+), 145 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 5d0af16de0..8b7f64a431 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -4,6 +4,36 @@ from utils import convert_album_to_song_url, get_cover, get_all_singles +def safe_action_url(item): + try: + # segue-based URLs (most items) + return item["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except Exception: + pass + + try: + # fallback: plain contentDescriptor + return item["contentDescriptor"]["url"] + except Exception: + return None + + +def find_section(sections, key): + for sec in sections: + if key in sec.get("id", ""): + return sec + return None + + +def append_urls_from_section(section, target_list): + if not section: + return + for it in section.get("items", []): + url = safe_action_url(it) + if url: + target_list.append(url) + + def room_scrape(link="https://music.apple.com/us/room/6748797380"): """ Scrape a shared Apple Music room and extract song URLs. @@ -407,11 +437,9 @@ def song_scrape(url="https://music.apple.com/us/song/california/1821538031"): try: more_items = sections[-1]["items"] for m in more_items: - try: - url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + url = safe_action_url(m) + if url: result["more"].append(url) - except (KeyError, IndexError, TypeError): - continue except (KeyError, IndexError, TypeError): pass @@ -563,11 +591,9 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 more_items = sections[more_index].get("items", []) for m in more_items: - try: - url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + url = safe_action_url(m) + if url: result["more"].append(url) - except Exception: - continue except Exception: pass @@ -575,11 +601,9 @@ def album_scrape(url="https://music.apple.com/us/album/1965/1817707266?i=1817707 try: sim_items = sections[similar_index].get("items", []) for s in sim_items: - try: - url = s["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + url = safe_action_url(s) + if url: result["similar"].append(url) - except Exception: - continue except Exception: pass @@ -670,11 +694,9 @@ def video_scrape( similar = sec # TITLE - try: - item = music_video_header["items"][0] - result["title"] = item.get("title", "") - except Exception: - pass + item = (music_video_header or {}).get("items", [{}])[0] + result["title"] = item.get("title", "") + # IMAGE try: @@ -715,22 +737,18 @@ def video_scrape( # MORE BY ARTIST try: for m in more.get("items", []): - try: - url = m["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + url = safe_action_url(m) + if url: result["more"].append(url) - except Exception: - continue except Exception: pass # SIMILAR try: for s in similar.get("items", []): - try: - url = s["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + url = safe_action_url(s) + if url: result["similar"].append(url) - except Exception: - continue except Exception: pass @@ -806,39 +824,16 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 except (KeyError, IndexError, json.JSONDecodeError): return result - artist_detail = None - latest_and_top = None - albums = None - playlists = None - videos = None - appears_on = None - more_to_see = None - more_to_hear = None - bio = None - similar = None - - for sec in sections: - sec_id = sec.get("id", "") - if "artist-detail-header-section" in sec_id: - artist_detail = sec - elif "latest-release-and-top-songs" in sec_id: - latest_and_top = sec - elif "full-albums" in sec_id: - albums = sec - elif "playlists" in sec_id: - playlists = sec - elif "music-videos" in sec_id: - videos = sec - elif "appears-on" in sec_id: - appears_on = sec - elif "more-to-see" in sec_id: - more_to_see = sec - elif "more-to-hear" in sec_id: - more_to_hear = sec - elif "artist-bio" in sec_id: - bio = sec - elif "similar-artists" in sec_id: - similar = sec + artist_detail = find_section(sections, "artist-detail-header-section") + latest_and_top = find_section(sections, "latest-release-and-top-songs") + albums = find_section(sections, "full-albums") + playlists = find_section(sections, "playlists") + videos = find_section(sections, "music-videos") + appears_on = find_section(sections, "appears-on") + more_to_see = find_section(sections, "more-to-see") + more_to_hear = find_section(sections, "more-to-hear") + bio = find_section(sections, "artist-bio") + similar = find_section(sections, "similar-artists") # HEADER try: @@ -863,106 +858,28 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 pass # TOP SONGS - try: - for it in latest_and_top.get("items", []): - try: - result["top"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(latest_and_top, result["top"]) # ALBUMS - try: - for it in albums.get("items", []): - try: - result["albums"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass - - # SINGLES & EPs - try: - result["singles_and_EP"] = get_all_singles(url) - except Exception: - pass + append_urls_from_section(albums, result["albums"]) # PLAYLISTS - try: - for it in playlists.get("items", []): - try: - result["playlists"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(playlists, result["playlists"]) # VIDEOS - try: - for it in videos.get("items", []): - try: - result["videos"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(videos, result["videos"]) - # SIMILAR ARTISTS - try: - for it in similar.get("items", []): - try: - result["similar"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + # SIMILAR + append_urls_from_section(similar, result["similar"]) # APPEARS ON - try: - for it in appears_on.get("items", []): - try: - result["appears_on"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(appears_on, result["appears_on"]) # MORE TO SEE - try: - for it in more_to_see.get("items", []): - try: - result["more_to_see"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(more_to_see, result["more_to_see"]) # MORE TO HEAR - try: - for it in more_to_hear.get("items", []): - try: - result["more_to_hear"].append( - it["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - ) - except Exception: - continue - except Exception: - pass + append_urls_from_section(more_to_hear, result["more_to_hear"]) # ABOUT try: @@ -1068,3 +985,6 @@ def test_all_functions(): print("artist_scrape ERROR:", e) print("\n=== ALL TESTS COMPLETED ===") + + +test_all_functions() \ No newline at end of file From 856d0b37341f2dac56f8b70e64665ed6ae7990de Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 18:00:41 +0330 Subject: [PATCH 20/29] change code style for ai checks --- Apple-Music-Scraper/main.py | 56 ++++------------- Apple-Music-Scraper/utils.py | 114 +++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 43 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 8b7f64a431..c0476768cf 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -1,37 +1,8 @@ from bs4 import BeautifulSoup import requests import json -from utils import convert_album_to_song_url, get_cover, get_all_singles - - -def safe_action_url(item): - try: - # segue-based URLs (most items) - return item["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] - except Exception: - pass - - try: - # fallback: plain contentDescriptor - return item["contentDescriptor"]["url"] - except Exception: - return None - - -def find_section(sections, key): - for sec in sections: - if key in sec.get("id", ""): - return sec - return None - - -def append_urls_from_section(section, target_list): - if not section: - return - for it in section.get("items", []): - url = safe_action_url(it) - if url: - target_list.append(url) +from utils import convert_album_to_song_url, get_cover +from utils import safe_action_url, find_section, append_urls_from_section def room_scrape(link="https://music.apple.com/us/room/6748797380"): @@ -697,7 +668,6 @@ def video_scrape( item = (music_video_header or {}).get("items", [{}])[0] result["title"] = item.get("title", "") - # IMAGE try: artwork = item.get("artwork", {}).get("dictionary", {}) @@ -824,16 +794,16 @@ def artist_scrape(url="https://music.apple.com/us/artist/king-princess/134996853 except (KeyError, IndexError, json.JSONDecodeError): return result - artist_detail = find_section(sections, "artist-detail-header-section") - latest_and_top = find_section(sections, "latest-release-and-top-songs") - albums = find_section(sections, "full-albums") - playlists = find_section(sections, "playlists") - videos = find_section(sections, "music-videos") - appears_on = find_section(sections, "appears-on") - more_to_see = find_section(sections, "more-to-see") - more_to_hear = find_section(sections, "more-to-hear") - bio = find_section(sections, "artist-bio") - similar = find_section(sections, "similar-artists") + artist_detail = find_section(sections, "artist-detail-header-section") + latest_and_top = find_section(sections, "latest-release-and-top-songs") + albums = find_section(sections, "full-albums") + playlists = find_section(sections, "playlists") + videos = find_section(sections, "music-videos") + appears_on = find_section(sections, "appears-on") + more_to_see = find_section(sections, "more-to-see") + more_to_hear = find_section(sections, "more-to-hear") + bio = find_section(sections, "artist-bio") + similar = find_section(sections, "similar-artists") # HEADER try: @@ -987,4 +957,4 @@ def test_all_functions(): print("\n=== ALL TESTS COMPLETED ===") -test_all_functions() \ No newline at end of file +# test_all_functions() \ No newline at end of file diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 8faac597b6..2a573ff655 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -156,3 +156,117 @@ def get_all_singles(url="https://music.apple.com/us/artist/king-princess/1349968 continue return result + + +def safe_action_url(item): + """ + Safely extract an Apple Music "actionUrl" from a section item. + + This function attempts to extract a playable or navigational URL from + Apple Music's internal JSON structure. It first looks for URLs provided + via `segue -> actionMetrics`, which is the most common structure. If that + fails, it falls back to the `contentDescriptor` URL when available. + + Parameters + ---------- + item : dict + A dictionary representing an Apple Music content item inside a section. + + Returns + ------- + str or None + The extracted URL if available, otherwise None. + + Notes + ----- + This helper prevents repetitive try/except blocks throughout all scraper + functions and gracefully handles missing keys, unexpected formats, or + incomplete items. + """ + try: + # segue-based URLs (most items) + return item["segue"]["actionMetrics"]["data"][0]["fields"]["actionUrl"] + except Exception: + pass + + try: + # fallback: plain contentDescriptor + return item["contentDescriptor"]["url"] + except Exception: + return None + + +def find_section(sections, key): + """ + Locate a specific Apple Music section by matching a substring in its ID. + + This utility searches through the list of sections extracted from + Apple Music's `serialized-server-data` and returns the first section + whose "id" field contains the provided key substring. + + Parameters + ---------- + sections : list[dict] + List of section dictionaries parsed from Apple Music page data. + key : str + Substring to search for inside the section ID. + + Returns + ------- + dict or None + The matching section dictionary if found, otherwise None. + + Notes + ----- + Apple Music uses structured section IDs such as: + - "artist-detail-header-section" + - "track-list" + - "music-videos" + - "similar-artists" + This function simplifies section lookup and reduces repeated loops and + conditional chains in scraper functions. + """ + for sec in sections: + if key in sec.get("id", ""): + return sec + return None + + +def append_urls_from_section(section, target_list): + """ + Extract URLs from a section and append them to a target list. + + This helper iterates through all items inside a given Apple Music + section, uses `safe_action_url()` to safely extract their URLs, + and appends each valid URL to the provided list. + + Parameters + ---------- + section : dict or None + The section dictionary containing an "items" list. If None, the + function does nothing. + target_list : list + The list to which valid extracted URLs will be appended. + + Returns + ------- + None + This function modifies target_list in-place. + + Notes + ----- + Many Apple Music sections such as: + - top songs + - albums + - playlists + - videos + - similar artists + share the same internal structure. This helper removes code duplication + and ensures unified URL extraction behavior. + """ + if not section: + return + for it in section.get("items", []): + url = safe_action_url(it) + if url: + target_list.append(url) \ No newline at end of file From 1ebd7f222adc5d48e080c7d28ba4d25283067d81 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 18:16:43 +0330 Subject: [PATCH 21/29] remove function call line --- Apple-Music-Scraper/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index c0476768cf..7e6a829d6f 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -955,6 +955,3 @@ def test_all_functions(): print("artist_scrape ERROR:", e) print("\n=== ALL TESTS COMPLETED ===") - - -# test_all_functions() \ No newline at end of file From b0537cd512ee442b4166e0d0886e0b61e95d573f Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 18:19:20 +0330 Subject: [PATCH 22/29] add newline at end of file - fix FLK-W292 --- Apple-Music-Scraper/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index 2a573ff655..b9db7caf57 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -269,4 +269,4 @@ def append_urls_from_section(section, target_list): for it in section.get("items", []): url = safe_action_url(it) if url: - target_list.append(url) \ No newline at end of file + target_list.append(url) From 3cc5053992b5fa9a7265f57bc71f34650e6760b2 Mon Sep 17 00:00:00 2001 From: Abbas Sadeghi Date: Mon, 24 Nov 2025 18:28:19 +0330 Subject: [PATCH 23/29] reduce complexity for video_scrape func --- Apple-Music-Scraper/main.py | 135 ++++++++++----------------------- Apple-Music-Scraper/utils.py | 143 +++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 95 deletions(-) diff --git a/Apple-Music-Scraper/main.py b/Apple-Music-Scraper/main.py index 7e6a829d6f..416c248aed 100644 --- a/Apple-Music-Scraper/main.py +++ b/Apple-Music-Scraper/main.py @@ -3,6 +3,8 @@ import json from utils import convert_album_to_song_url, get_cover from utils import safe_action_url, find_section, append_urls_from_section +from utils import fetch_page, parse_server_data, extract_header_sections +from utils import extract_video_header, extract_video_url, extract_urls def room_scrape(link="https://music.apple.com/us/room/6748797380"): @@ -623,104 +625,47 @@ def video_scrape( ----- Uses JSON-LD block `schema:music-video` to extract the direct video content URL. """ - result = { - "title": "", - "image": "", - "artist": {"title": "", "url": ""}, - "video-url": "", - "more": [], - "similar": [], - } - - headers = {"User-Agent": "Mozilla/5.0"} - - try: - rspn = requests.get(url, headers=headers, timeout=10) - rspn.raise_for_status() - except Exception: - return result - - soup = BeautifulSoup(rspn.text, "html.parser") - tag = soup.find("script", {"id": "serialized-server-data"}) - if not tag: - return result - - try: - data = json.loads(tag.text) - sections = data[0]["data"]["sections"] - except (KeyError, IndexError, json.JSONDecodeError): - return result - - music_video_header = None - more = None - similar = None - - for sec in sections: - sec_id = sec.get("id", "") - if "music-video-header" in sec_id: - music_video_header = sec - elif "more-by-artist" in sec_id: - more = sec - elif "more-in-genre" in sec_id: - similar = sec - - # TITLE - item = (music_video_header or {}).get("items", [{}])[0] - result["title"] = item.get("title", "") - - # IMAGE - try: - artwork = item.get("artwork", {}).get("dictionary", {}) - result["image"] = get_cover( - artwork.get("url", ""), - artwork.get("width", 0), - artwork.get("height", 0), - ) - except Exception: - pass - - # ARTIST - try: - sl = item.get("subtitleLinks", [])[0] - result["artist"]["title"] = sl.get("title", "") - result["artist"]["url"] = ( - sl["segue"]["actionMetrics"] - ["data"][0]["fields"]["actionUrl"] - ) - except Exception: - pass + html = fetch_page(url) + if not html: + return { + "title": "", + "image": "", + "artist": {"title": "", "url": ""}, + "video-url": "", + "more": [], + "similar": [], + } - # VIDEO URL - try: - json_tag = soup.find( - "script", - { - "id": "schema:music-video", - "type": "application/ld+json" - } - ) - schema_data = json.loads(json_tag.string) - result["video-url"] = schema_data["video"]["contentUrl"] - except (AttributeError, KeyError, TypeError, json.JSONDecodeError): - pass + sections = parse_server_data(html) + if not sections: + return {} - # MORE BY ARTIST - try: - for m in more.get("items", []): - url = safe_action_url(m) - if url: - result["more"].append(url) - except Exception: - pass + header, more_sec, similar_sec = extract_header_sections(sections) + info = extract_video_header(header) - # SIMILAR - try: - for s in similar.get("items", []): - url = safe_action_url(s) - if url: - result["similar"].append(url) - except Exception: - pass + # Build result + result = { + "title": info["title"], + "image": get_cover( + info["artwork"].get("url", ""), + info["artwork"].get("width", 0), + info["artwork"].get("height", 0), + ), + "artist": { + "title": info["artist_link"].get("title", ""), + "url": ( + info["artist_link"] + .get("segue", {}) + .get("actionMetrics", {}) + .get("data", [{}])[0] + .get("fields", {}) + .get("actionUrl", "") + ), + }, + "video-url": extract_video_url(html), + "more": extract_urls(more_sec), + "similar": extract_urls(similar_sec), + } return result diff --git a/Apple-Music-Scraper/utils.py b/Apple-Music-Scraper/utils.py index b9db7caf57..d1c3c9d788 100644 --- a/Apple-Music-Scraper/utils.py +++ b/Apple-Music-Scraper/utils.py @@ -270,3 +270,146 @@ def append_urls_from_section(section, target_list): url = safe_action_url(it) if url: target_list.append(url) + + +def fetch_page(url): + """ + Fetch the HTML content of a web page. + + Args: + url (str): The target URL to request. + + Returns: + str or None: The text content of the page if the request succeeds, + otherwise None. + """ + headers = {"User-Agent": "Mozilla/5.0"} + try: + rspn = requests.get(url, headers=headers, timeout=10) + rspn.raise_for_status() + return rspn.text + except Exception: + return None + + +def parse_server_data(html): + """ + Parse serialized server data from an Apple Music–like HTML page. + + The function looks for a