In [None]:
import requests
import pandas as pd
import re
from collections import defaultdict  

def extract_album_id(album_link):
    match = re.search(r'/album/.*?/([\w-]+)\.html', album_link or "")
    return match.group(1) if match else "N/A"

def determine_album_type(track_count):
    if track_count <= 3:
        return "Single"
    elif 4 <= track_count <= 6:
        return "EP"
    else:
        return "Regular"

def fetch_artist_songs(artist_name):
    url = f"http://localhost:5000/api/artistsongs?name={artist_name}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        songs = data.get("songs", [])

        records = []
        album_tracks = defaultdict(set)  # Lưu các track duy nhất trong mỗi album

        for song in songs:
            album_id = extract_album_id(song.get("albumLink", "") or "")
            album_name = song.get("album", song.get("title", "N/A"))
            tracklist = song.get("tracklist", [])

            if not tracklist:
                tracklist = [{"title": song.get("title", "N/A"), "link": song.get("link", "N/A")}]

            for track in tracklist:
                track_title = track.get("title", "N/A")
                album_tracks[album_id].add(track_title)  # Lưu các track vào set để tránh trùng lặp

                records.append({
                    "album_id": album_id,
                    "album_name": album_name,
                    "tracklist": track_title,
                    "release_date": song.get("releaseDate", "Unknown"),
                    "provided_by": song.get("providedBy", "Unknown"),
                    "featured_artists": song.get("featuredArtists", "Unknown"),
                    "album_artist": song.get("albumOwner", "Unknown"),
                    "ZingMP3": track.get("link", "N/A")
                })

        df = pd.DataFrame(records)
        df = df.drop_duplicates(subset=["album_id", "tracklist"])  # Tránh bài hát trùng lặp
        df = df.sort_values(by=["album_name", "tracklist"], ascending=[True, True])

        # Thêm cột phân loại album dựa trên số lượng track thực tế
        df["album_type"] = df["album_id"].map(lambda x: determine_album_type(len(album_tracks[x])))

        return df
    else:
        print("Error fetching data:", response.status_code)
        return None

artist_name = 'Rapper-Ngan'
df = fetch_artist_songs(artist_name)
df.to_excel(f'{artist_name}_songZingMP3.xlsx', index=False)
df.head()
