In [73]:
from rdflib import Graph, Namespace, Literal, RDF, URIRef
from rdflib.namespace import FOAF, DC, XSD

# Définition des namespaces
EX = Namespace("http://example.org/")
SCHEMA = Namespace("http://schema.org/")
MO = Namespace("http://purl.org/ontology/mo/")

# Création du graphe
g = Graph()
g.bind("foaf", FOAF)
g.bind("dc", DC)
g.bind("schema", SCHEMA)
g.bind("mo", MO)
g.bind("ex", EX)

In [74]:
import musicbrainzngs

musicbrainzngs.set_useragent("musicbrainzngs","0.7.0")

In [75]:
artists = []

In [88]:
import requests
import time

HEADERS = {
    "User-Agent": "RapCollabParser/1.0 ( contact@example.org )"
}

BASE_URL_MB = "https://musicbrainz.org/ws/2"

def search_artist(name):
    url = f"{BASE_URL_MB}/artist"
    params = {
        "query": name,
        "fmt": "json",
        "limit": 1
    }
    response = requests.get(url, params=params, headers=HEADERS)
    response.raise_for_status()
    results = response.json()
    if results["artists"]:
        return results["artists"][0]
    return None

def get_artist_releases(mbid):
    url = f"{BASE_URL_MB}/release"
    params = {
        "artist": mbid,
        "inc": "release-groups",
        "fmt": "json",
        "limit": 100
    }
    response = requests.get(url, params=params, headers=HEADERS)
    response.raise_for_status()
    return response.json()["releases"]

def get_artist_details(mbid):
    url = f"{BASE_URL_MB}/artist/{mbid}"
    params = {
        "inc": "aliases",
        "fmt": "json"
    }
    response = requests.get(url, params=params, headers=HEADERS)
    time.sleep(1)  # Respecter les limites de l'API
    response.raise_for_status()
    return response.json()



In [77]:
# Exemple d'utilisation
if __name__ == "__main__":
    name = "Nekfeu"
    artist = search_artist(name)

    if artist:
        mbid = artist["id"]
        print(f"🎤 Artiste trouvé : {artist['name']} (MBID: {mbid})")

        details = get_artist_details(mbid)
        time.sleep(1)  # Respecter les limites de l'API
        releases = get_artist_releases(mbid)
        
        print("Colonnes disponibles dans 'artist':", artist.keys())

        print("Nom complet :", details.get("name"))
        print("Alias :", [alias["name"] for alias in details.get("aliases", [])])
        print("Pays :", details.get("area", {}).get("name"))
        print("Lieu de naissance :", details.get("begin-area", {}).get("name"))
        print("Date de naissance :", details.get("life-span", {}).get("begin"))

        print("\n🎶 Albums / Releases :")
        for r in releases[:10]:  # pour limiter l'affichage
            print("-", r["title"], "(date:", r.get("date", "n/a") + ")")
            
        artists.append(artist)

    else:
        print("Aucun artiste trouvé.")


🎤 Artiste trouvé : Nekfeu (MBID: d721866d-5640-44ee-87f7-23dd062abd8a)
Colonnes disponibles dans 'artist': dict_keys(['id', 'type', 'type-id', 'score', 'gender-id', 'name', 'sort-name', 'gender', 'country', 'area', 'begin-area', 'isnis', 'life-span', 'aliases', 'tags'])
Nom complet : Nekfeu
Alias : ['Ken Samaras', 'Nekfeu', 'Κεν Σαμαράς']
Pays : France
Lieu de naissance : La Trinité
Date de naissance : 1990-04-03

🎶 Albums / Releases :
- Cyborg (date: 2016-12-02)
- Au coeur du G (date: 2019-10-11)
- Feu (date: 2015-06-08)
- En sous-marin (date: 2011-10-11)
- Expansion (date: 2019-06-21)
- Les Étoiles vagabondes / Expansion (date: 2019-06-21)
- Nique les clones (date: 2011-12-15)
- Feu (date: 2015-06-08)
- Expansion (date: 2019-06-21)
- Cyborg (date: 2016-12-02)


In [78]:
# === Construction RDF

# Artistes
for artist in artists:
    mbid = artist["id"]
    details = get_artist_details(mbid)
    
    artist_uri = EX[artist["id"]]
    g.add((artist_uri, RDF.type, MO.MusicArtist))
    g.add((artist_uri, FOAF.name, Literal(details.get("name"))))
    g.add((artist_uri, SCHEMA.birthPlace, Literal(details.get("begin-area", {}).get("name"))))
    birth_date = details.get("life-span", {}).get("begin")
    if birth_date: g.add((artist_uri, SCHEMA.birthDate, Literal(birth_date, datatype=XSD.date)))


    # releases = get_artist_releases(artist["id"])

    # for release in releases:
    #     album_id = release["id"]
    #     album_title = release.get("title")
        
    #     album_uri = EX[album_id]
        
    #     # Lien entre artiste et album
    #     g.add((artist_uri, MO.produced, album_uri))
    #     g.add((artist_uri, MO.published_album, album_uri))
        
    #     # Déclaration du nœud album et ajout de son titre
    #     g.add((album_uri, RDF.type, MO.Release))
    #     if album_title:
    #         g.add((album_uri, DC.title, Literal(album_title)))

    # for song_id in artist["songs"]:
    #     g.add((artist_uri, MO.produced, EX[song_id]))

    # for feat_id in artist["features"]:
    #     g.add((artist_uri, MO.featured_on, EX[feat_id]))

# Musiques
# for song_id, song in songs.items():
#     song_uri = EX[song_id]
#     g.add((song_uri, RDF.type, MO.MusicalWork))
#     g.add((song_uri, DC.title, Literal(song["title"])))
#     g.add((song_uri, MO.performer, EX[song["author"]]))
#     g.add((song_uri, DC.date, Literal(song["release_date"], datatype=XSD.date)))

# # Albums
# for album_id, album in albums.items():
#     album_uri = EX[album_id]
#     g.add((album_uri, RDF.type, MO.Release))
#     g.add((album_uri, DC.title, Literal(album["title"])))
#     g.add((album_uri, MO.producer, EX[album["author"]]))
#     g.add((album_uri, DC.date, Literal(album["release_date"], datatype=XSD.date)))

# === Export en Turtle
g.serialize("music_graph.ttl", format="turtle")
print("✅ RDF exporté dans 'music_graph.ttl'")


✅ RDF exporté dans 'music_graph.ttl'


In [None]:
def search_french_rappers(limit=100, offset=0):
    url = f"{BASE_URL_MB}/artist"
    params = {
        "query": '(tag:"rap" OR tag:"hip hop" OR tag:"trap") AND country:FR',
        "fmt": "json",
        "limit": limit,
        "offset": offset
    }
    response = requests.get(url, params=params, headers=HEADERS)
    time.sleep(0.5)  # Respect API rate limits
    response.raise_for_status()
    return response.json().get("artists", [])

In [None]:
def extract_artist_names(limit=100, offset=0):
    url = f"{BASE_URL_MB}/artist"
    query = '(tag:"rap" OR tag:"hip hop" OR tag:"trap" OR tag:"cloud rap") AND country:FR'
    params = {
        "query": query,
        "fmt": "json",
        "limit": limit,
        "offset": offset
    }
    response = requests.get(url, params=params, headers=HEADERS)
    time.sleep(0.5)  # Respecter les limites d’usage
    response.raise_for_status()
    
    artists = response.json().get("artists", [])
    return [artist["name"] for artist in artists]


In [81]:
import string

def collect_french_rappers_by_letter():
    all_artists = set()

    for letter in string.ascii_lowercase:
        query = f'artist:{letter}* AND (tag:"rap" OR tag:"hip hop" OR tag:"trap" OR "cloud rap" ) AND (country:FR OR country:BE)'
        params = {
            "query": query,
            "fmt": "json",
            "limit": 100
        }

        response = requests.get(f"{BASE_URL_MB}/artist", params=params, headers=HEADERS)
        time.sleep(1)
        response.raise_for_status()
        results = response.json().get("artists", [])
        
        for artist in results:
            name = artist.get("name")
            if name:
                all_artists.add((name, artist.get("id")))

        print(f"✅ {len(results)} artistes récupérés pour '{letter.upper()}*'")

    return list(all_artists)


In [82]:
all_french_rappers = collect_french_rappers_by_letter()

✅ 48 artistes récupérés pour 'A*'
✅ 60 artistes récupérés pour 'B*'
✅ 44 artistes récupérés pour 'C*'
✅ 77 artistes récupérés pour 'D*'
✅ 25 artistes récupérés pour 'E*'
✅ 32 artistes récupérés pour 'F*'
✅ 21 artistes récupérés pour 'G*'
✅ 13 artistes récupérés pour 'H*'
✅ 14 artistes récupérés pour 'I*'
✅ 22 artistes récupérés pour 'J*'
✅ 31 artistes récupérés pour 'K*'
✅ 70 artistes récupérés pour 'L*'
✅ 59 artistes récupérés pour 'M*'
✅ 41 artistes récupérés pour 'N*'
✅ 17 artistes récupérés pour 'O*'
✅ 45 artistes récupérés pour 'P*'
✅ 2 artistes récupérés pour 'Q*'
✅ 26 artistes récupérés pour 'R*'
✅ 90 artistes récupérés pour 'S*'
✅ 31 artistes récupérés pour 'T*'
✅ 5 artistes récupérés pour 'U*'
✅ 11 artistes récupérés pour 'V*'
✅ 8 artistes récupérés pour 'W*'
✅ 1 artistes récupérés pour 'X*'
✅ 11 artistes récupérés pour 'Y*'
✅ 3 artistes récupérés pour 'Z*'


In [83]:
print(len(all_french_rappers))

def find_name_duplicates(artist_tuples):
    name_count = {}
    for name, _ in artist_tuples:
        name_count[name] = name_count.get(name, 0) + 1
    return [name for name, count in name_count.items() if count > 1]

find_name_duplicates(all_french_rappers)  # Trouver les doublons de noms

523


[]

La liste obtenue est sans doublons !! Il reste à filtrer plus finement sur le genre et sur le nombre d'auditeurs

In [84]:
import jellyfish
import unicodedata

def normalize_string(s):
    # Supprime les accents, met en minuscules et enlève les caractères non alphanumériques
    s = s.lower()
    s = unicodedata.normalize('NFD', s)
    s = ''.join(c for c in s if unicodedata.category(c) != 'Mn')  # enlève les diacritiques
    return s

def find_similar_names(artist_tuples, name):
    similar_names = []
    for artist in artist_tuples:
        name = normalize_string(name)
        artist_name = normalize_string(artist[0])
        similarity = jellyfish.jaro_winkler_similarity(artist_name, name)
        if similarity > 0.8:
            similar_names.append((artist, similarity))
    return similar_names

def find_artist(artist_tuples, name):
    for artist in artist_tuples:
        if jellyfish.jaro_winkler_similarity(artist[0], name) > 0.8:
            return artist, jellyfish.jaro_winkler_similarity(artist[0], name)
    return None

In [85]:
find_similar_names(all_french_rappers, "")  # Trouver un artiste par son nom

[]

# Tests avec l'API last.fm

In [None]:
import os
from dotenv import load_dotenv
import requests

# Charge le fichier .env (ou .md) à la racine du projet
load_dotenv("API.md")

# Récupère la clé
API_KEY = os.getenv("LASTFM_API_KEY")

if not API_KEY:
    raise ValueError("Clé API Last.fm non trouvée")

BASE_URL_LFM = "http://ws.audioscrobbler.com/2.0/"

def get_similar_artists(artist_name, limit=25):
    params = {
        "method": "artist.getsimilar",
        "artist": artist_name,
        "api_key": API_KEY,
        "format": "json",
        "limit": limit
    }
    response = requests.get(BASE_URL_LFM, params=params)
    response.raise_for_status()
    return response.json()

def get_artist_mbid(artist_name):
    params = {
        "method": "artist.getInfo",
        "artist": artist_name,
        "api_key": API_KEY,
        "format": "json"
    }

    response = requests.get(BASE_URL_LFM, params=params)
    response.raise_for_status()
    data = response.json()

    if "artist" in data and "mbid" in data["artist"]:
        return data["artist"]["mbid"]
    return None

def get_artist_by_mbid(mbid):
    url = f"{BASE_URL_MB}/artist/{mbid}"
    params = {
        "fmt": "json",
        "inc": "aliases"
    }
    response = requests.get(url, headers=HEADERS, params=params)
    time.sleep(1)  # Pour respecter les limites de l'API
    response.raise_for_status()
    return response.json()

In [None]:
artist = "Nekfeu"
similar_artists = get_similar_artists(artist, 25) # ATTENTION : 250 est la limite de l'API Last.fm
mbid = get_artist_mbid(artist)

print(f"🎵 Artistes similaires à {artist} :")
for similar in similar_artists["similarartists"]["artist"]:
    print("-", similar["name"], f"({float(similar['match']):.2f})")  # score de similarité

{'id': 'd721866d-5640-44ee-87f7-23dd062abd8a', 'life-span': {'end': None, 'begin': '1990-04-03', 'ended': False}, 'end-area': None, 'country': 'FR', 'type': 'Person', 'isnis': ['0000000407569240', '0000000451231028'], 'name': 'Nekfeu', 'gender': 'Male', 'type-id': 'b6e035f4-3ce9-331c-97df-83397230b0df', 'begin-area': {'disambiguation': '', 'type-id': None, 'type': None, 'sort-name': 'La Trinité', 'id': '1f7dde23-c6ce-4d08-8e72-e435e7d7197b', 'name': 'La Trinité'}, 'area': {'type-id': None, 'type': None, 'sort-name': 'France', 'name': 'France', 'disambiguation': '', 'id': '08310658-51eb-3801-80de-5a0739207115', 'iso-3166-1-codes': ['FR']}, 'gender-id': '36d3d30a-839d-3eda-8cb3-29be4384e4a9', 'disambiguation': '', 'sort-name': 'Nekfeu', 'aliases': [{'sort-name': 'Samaras, Ken', 'type': 'Legal name', 'name': 'Ken Samaras', 'end': None, 'locale': None, 'type-id': 'd4dcd0c0-b341-3612-a332-c0ce797b25cf', 'begin': None, 'ended': False, 'primary': None}, {'ended': False, 'primary': None, 'name