# Projet IC05 – Analyse des Top50 Spotify
## Collecte et traitement des données des artistes issus des Top50

### Imports

In [1]:
import requests
import csv
import pandas as pd
from credentials import ClientIDCO, ClientSecretCO

def get_acces_token(client_id, client_secret):
    """retourne un json (dict)"""
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": f"{client_id}",
        f"client_secret": f"{client_secret}"
    }

    response = requests.post(url, headers=headers, data=data)

    return response.json()

# recupérer un acces token
AccessToken = get_acces_token(ClientIDCO, ClientSecretCO)
# print(AccessToken['access_token'])





### Collecte des données sur les artistes

In [3]:
# correspondance entre artists_id et country
def extract_artist_country_mapping(src_file):
    artist_country_mapping = {}
    with open(src_file, 'r', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # ignorer l'en-tête
        for row in reader:
            artist_ids = row[2].strip('"[]').replace(" ", "").split(",")
            country = row[-1]  # colonne "country"
            for artist_id in artist_ids:
                artist_country_mapping[artist_id] = country
    return artist_country_mapping

In [4]:
# ecriture des données json dans un csv
def json_to_csv(json_data, output_filename, artist_country_mapping):
    # Préparer le fichier CSV
    with open(output_filename, mode='a+', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["artists", "followers", "genres", "name", "popularity", "country"])
        if file.tell() == 0:  # ecrire l'en-tête uniquement si le fichier est vide
            writer.writeheader()

        # Traiter chaque artiste dans la liste
        for artist in json_data["artists"]:
            # Extraire l'ID à partir de l'URL Spotify
            full_url = artist.get("external_urls", {}).get("spotify", "")
            artist_id = full_url.split("/")[-1] if full_url else ""

            # Préparer les informations pertinentes pour chaque artiste
            row = {
                "artists": artist_id,
                "followers": artist.get("followers", {}).get("total", 0),
                "genres": ", ".join(artist.get("genres", [])),
                "name": artist.get("name", ""),
                "popularity": artist.get("popularity", 0),
                "country": artist_country_mapping.get(artist_id, "Unknown")
            }
            writer.writerow(row)


In [6]:
# !!!!! ajouter le pays dans la collecte !!!!!!
# retrieve artists info (name, genres,popularity, followers)
def get_artists_info(AccessToken, src_file, dst_file) :
    # construire le mapping entre artist_id et pays
    artist_country_mapping = extract_artist_country_mapping(src_file)

    artists_ids_string = ""
    artists_ids_list = ""
    result =[]

    # lecture du fichier CSV et recup les artistes_id
    with open(src_file, 'r', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        first_row = next(reader)
        for row in reader:
            # print(row[2]) # col artistes
            if "," in row[2] :
                # print(row[2].strip('"[]').replace(" ", ""))
                artists_ids_string += row[2].strip('"[]').replace(" ", "")
            else :
                # print(row[2][1:23])
                artists_ids_string += row[2][1:23]

            artists_ids_string +=","
        
               
    # print(artists_ids_string)
    artists_ids_list = artists_ids_string.split(",")
    # print(artists_ids_list)
    artists_ids_list = list(dict.fromkeys(artists_ids_list))

    for i in range(0, len(artists_ids_list), 50 ):
        chunk = artists_ids_list[i:i + 50]  # 50 artistes
        # print(','.join(chunk), "\n")
        response = requests.get(
            url = "https://api.spotify.com/v1/artists?ids=" +','.join(chunk),
            headers = {
                "Authorization": 'Bearer ' + AccessToken['access_token']
            }
        )
        # print(response.json())
        # requête réussie
        if response.status_code == 200:
            json_to_csv(response.json(), dst_file, artist_country_mapping)
        # erreur requete
        else:
            print(f"Erreur lors de la requête pour le lot {i // 50 + 1}: {response.status_code}")

    #return artists_ids_list, artists_ids_list
get_artists_info(AccessToken, "data/continent_features/Africa.csv", "data/artists/artists_africa.csv")
get_artists_info(AccessToken, "data/continent_features/Asia.csv", "data/artists/artists_asia.csv")
get_artists_info(AccessToken, "data/continent_features/Central_America.csv", "data/artists/artists_central_america.csv")
get_artists_info(AccessToken, "data/continent_features/South_America.csv", "data/artists/artists_south_america.csv")
get_artists_info(AccessToken, "data/continent_features/North_America.csv", "data/artists/artists_north_america.csv")
get_artists_info(AccessToken, "data/continent_features/Europe.csv", "data/artists/artists_europe.csv")
get_artists_info(AccessToken, "data/continent_features/Oceania.csv", "data/artists/artists_oceania.csv")
get_artists_info(AccessToken, "data/top50_features_avec_USA.csv", "data/artists/artists.csv")



### Collecte de tous les artistes liés


### Suppression des artistes liés hors Top50