In [3]:
import time  # Pour introduire des pauses entre les requêtes

GENIUS_API_TOKEN = "XXX"
genius = lg.Genius(GENIUS_API_TOKEN)

def get_artist_id(artist_name):
    """Récupère l'ID d'un artiste en utilisant l'API Genius."""
    search_url = f"https://api.genius.com/search?q={artist_name}"
    headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}

    try:
        response = requests.get(search_url, headers=headers)
        response.raise_for_status()  # Vérifie les erreurs HTTP
        data = response.json()

        if 'response' in data and data['response']['hits']:
            return data['response']['hits'][0]['result']['primary_artist']['id']

        print(f"Aucune donnée trouvée pour l'artiste: {artist_name}")
        return None
    except Exception as e:
        print(f"Erreur lors de la récupération de l'ID pour {artist_name}: {e}")
        return None

def get_songs(artist_id, max_songs=100):
    """Récupère les titres des chansons d'un artiste donné en les triant par popularité."""
    timeout_duration = 20
    songs = []
    page = 1
    
    # Paramètre pour trier par popularité
    sort_by = "popularity"  # Peut être "title" si tu veux trier par ordre alphabétique des titres
    
    while True:
        url = f"https://api.genius.com/artists/{artist_id}/songs?page={page}&sort={sort_by}"
        headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}
        
        try:
            response = requests.get(url, headers=headers, timeout=timeout_duration)
            response.raise_for_status()  # Vérifie les erreurs HTTP
            songs_data = response.json()['response']['songs']
        except requests.exceptions.Timeout:
            print(f"Requête timeout pour la page {page}, tentative de nouvelle requête...")
            continue
        except requests.exceptions.RequestException as e:
            print(f"Erreur de connexion : {e}")
            break

        if not songs_data:
            break

        for song in songs_data:
            if len(songs) >= max_songs:
                return songs
            songs.append(song['title'])

        page += 1
        time.sleep(1)  # Pause entre les requêtes pour éviter les blocages API
    return songs

def download_lyrics(song_title, artist_name):
    """Télécharge les paroles d'une chanson donnée."""
    try:
        song = genius.search_song(song_title, artist_name)
        if song:
            return song.lyrics
        else:
            print(f"Paroles non trouvées pour {song_title}")
            return None
    except Exception as e:
        print(f"Erreur lors du téléchargement des paroles de {song_title}: {e}")
        return None

def all_artist_songs(artist_to_scrape):
    """Télécharge les paroles de toutes les chansons pour chaque artiste et les stocke dans un fichier par artiste."""
    for artist in artist_to_scrape:
        artist_id = get_artist_id(artist)
        if artist_id:
            songs = get_songs(artist_id)
            if songs:
                os.makedirs(artist, exist_ok=True)
                artist_lyrics = []

                for song in songs:
                    lyrics = download_lyrics(song, artist)
                    if lyrics:
                        artist_lyrics.append(f"{song}\n{lyrics}\n")
                        print(f"Téléchargé : {song} de {artist}")
                    else:
                        print(f"Paroles non trouvées pour {song} de {artist}")

                with open(f"{artist}/{artist}_songs.txt", 'w', encoding='utf-8') as file:
                    file.write("\n".join(artist_lyrics))
                    print(f"Fichier créé : {artist}/{artist}_songs.txt")
            else:
                print(f"Aucune chanson trouvée pour {artist}")
        else:
            print(f"ID d'artiste non trouvé pour {artist}")

# Liste d'artistes à scraper
artist_to_scrape = ['KANYE WEST']

# Appeler la fonction pour télécharger les paroles
all_artist_songs(artist_to_scrape)

Searching for "Mercy" by KANYE WEST...
Done.
Téléchargé : Mercy de KANYE WEST
Searching for "Niggas in Paris" by KANYE WEST...
Done.
Téléchargé : Niggas in Paris de KANYE WEST
Searching for "Panda" by KANYE WEST...
Done.
Téléchargé : Panda de KANYE WEST
Searching for "Monster" by KANYE WEST...
Done.
Téléchargé : Monster de KANYE WEST
Searching for "Father Stretch My Hands Pt. 1" by KANYE WEST...
Done.
Téléchargé : Father Stretch My Hands Pt. 1 de KANYE WEST
Searching for "Ultralight Beam" by KANYE WEST...
Done.
Téléchargé : Ultralight Beam de KANYE WEST
Searching for "Bound 2" by KANYE WEST...
Done.
Téléchargé : Bound 2 de KANYE WEST
Searching for "Clique" by KANYE WEST...
Done.
Téléchargé : Clique de KANYE WEST
Searching for "I Don’t Fuck with You" by KANYE WEST...
Done.
Téléchargé : I Don’t Fuck with You de KANYE WEST
Searching for "I Love It" by KANYE WEST...
Done.
Téléchargé : I Love It de KANYE WEST
Searching for "No Church in the Wild" by KANYE WEST...
Done.
Téléchargé : No Churc

Done.
Téléchargé : Takeover de KANYE WEST
Searching for "Wouldn’t Leave" by KANYE WEST...
Done.
Téléchargé : Wouldn’t Leave de KANYE WEST
Searching for "Lollipop (Remix)" by KANYE WEST...
Done.
Téléchargé : Lollipop (Remix) de KANYE WEST
Searching for "Two Birds, One Stone" by KANYE WEST...
Done.
Téléchargé : Two Birds, One Stone de KANYE WEST
Searching for "Pop Style" by KANYE WEST...
Done.
Téléchargé : Pop Style de KANYE WEST
Searching for "Reborn" by KANYE WEST...
Done.
Téléchargé : Reborn de KANYE WEST
Searching for "Moon" by KANYE WEST...
Done.
Téléchargé : Moon de KANYE WEST
Searching for "Follow God" by KANYE WEST...
Done.
Téléchargé : Follow God de KANYE WEST
Searching for "SMUCKERS" by KANYE WEST...
Done.
Téléchargé : SMUCKERS de KANYE WEST
Searching for "Closed on Sunday" by KANYE WEST...
Done.
Téléchargé : Closed on Sunday de KANYE WEST
Searching for "SKELETONS" by KANYE WEST...
Done.
Téléchargé : SKELETONS de KANYE WEST
Searching for "Eazy" by KANYE WEST...
Done.
Téléchargé

In [5]:
import os

# Load the lyrics from the saved file
def load_lyrics(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lyrics = f.read()
    return lyrics

artist_name = "KANYE WEST"
lyrics_file = f"{artist_name}/{artist_name}_songs.txt"
lyrics = load_lyrics(lyrics_file)

In [7]:
## Inutile ?
from transformers import GPT2Tokenizer

# Load GPT-2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Tokenize the lyrics
lyrics_tokens = tokenizer(lyrics, return_tensors='pt', max_length=1024, truncation=True)



In [13]:
import torch
from torch.utils.data import Dataset

class LyricsDataset(Dataset):
    def __init__(self, lyrics_tokens, block_size):
        self.input_ids = lyrics_tokens['input_ids']
        self.attention_mask = lyrics_tokens['attention_mask']
        self.block_size = block_size

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, i):
        input_ids = self.input_ids[i][:self.block_size]
        attention_mask = self.attention_mask[i][:self.block_size]

        # Shift input_ids by one position to create labels
        labels = input_ids.clone()
        
        # Return input_ids, attention_mask, and labels for the training step
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels
        }

# Define block size (the maximum sequence length for each input)
block_size = 1024

# Prepare the dataset
train_dataset = LyricsDataset(lyrics_tokens, block_size)

In [14]:
from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          
    overwrite_output_dir=True,       
    num_train_epochs=3,              
    per_device_train_batch_size=1,   
    save_steps=500,                  
    save_total_limit=2,              
)

# Initialize Trainer with the updated dataset containing labels
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
)

# Fine-tune the model
trainer.train()

Step,Training Loss


TrainOutput(global_step=3, training_loss=2.4951213200887046, metrics={'train_runtime': 22.0621, 'train_samples_per_second': 0.136, 'train_steps_per_second': 0.136, 'total_flos': 1567752192000.0, 'train_loss': 2.4951213200887046, 'epoch': 3.0})

In [16]:
# Texte de départ pour générer une nouvelle chanson
prompt = "I miss the old Kanye"  # Vous pouvez changer ça

# Tokenizer le prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# Générer du texte à partir du modèle fine-tuné
output = model.generate(
    input_ids,
    max_length=200,            # Nombre maximum de tokens générés
    num_return_sequences=1,    # Nombre de séquences à générer
    no_repeat_ngram_size=2,    # Éviter de répéter des phrases identiques
    do_sample=True,            # Activer l'échantillonnage pour rendre la génération plus créative
    top_k=50,                  # Paramètre top-k sampling
    top_p=0.95,                # Paramètre top-p nucleus sampling
    temperature=0.7            # Contrôler la "créativité" du modèle (0.7 est une valeur équilibrée)
)

# Décoder la sortie générée
generated_lyrics = tokenizer.decode(output[0], skip_special_tokens=True)

# Afficher les paroles générées
print(generated_lyrics)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


I miss the old Kanye West.

It's hard to take a job that's been good for me for so long, but it's an old one. And I can't believe how hard it is to get back. But it can be a long road, and I'm going to do it. It's long enough to make me a millionaire, as long as it takes to earn that. (Laughs.)
. I got a lot of people to be in on my ideas and ideas, so that was the end of my career. He was trying to put me in his place. His name was Jay Z. The first thing I did was like, "I got this idea. This." I was thinking, 'Is this a good idea? Is this going too far?' And he was, like 'I think I'll make it work.' I just kept trying. If I don't, I'd do this. They're going crazy. Why? Because this is the only
