# Dataset / Imports:

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

## Libraries


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.decomposition import PCA
import warnings
import librosa
import librosa.display
import numpy as np
import glob
import os

print("Versions des librairies importantes :")
print(f"  Librosa:      {librosa.__version__}")
print(f"  NumPy:        {np.__version__}")

## Dataset

In [None]:
dataset_path = "/content/drive/MyDrive/Artishow/Data/genres_original"

subfolders = os.listdir(dataset_path)
print("Sous-dossiers:", subfolders)

audio_files = glob.glob(f"{dataset_path}/**/*.wav", recursive=True)
print(f"Nombre de fichiers audio: {len(audio_files)}")

# PHASE 1 : Classificateurs de genres - Genre Classifiers


## Approche 1 : Machine Learning Classifiers

### Extraction des features


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib
import os
import zipfile
from IPython.display import FileLink

X = [] #features
Y = [] #labels

def extract_features(file_path, duration=30, sr=22050):
    y, sr = librosa.load(file_path, sr=sr, duration=duration)

    # Fonction pour extraire la moyenne et l'écart-type
    def stats(feature):
        return list(map(float, np.mean(feature, axis=1))) + list(map(float, np.std(feature, axis=1)))

    # Extraction des features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    print(len(mfcc[0]))
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    flatness = librosa.feature.spectral_flatness(y=y)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zero_crossing = librosa.feature.zero_crossing_rate(y=y)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Construction du vecteur de features
    features = []
    for f in [mfcc, rms, spectral_centroid, bandwidth, contrast, flatness, rolloff, tonnetz, zero_crossing]:
        features.extend(stats(f))
    features.append(float(tempo))

    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    print("Genre connu : " + genre)
    print("")

    X.append(features)
    Y.append(genre)

i = 0
for file in audio_files:
    i+=1
    print(i)
    extract_features(file)

df = pd.DataFrame(X)
df['genre'] = Y
df['genre'] = df['genre'].astype('category')
genre_mapping = dict(enumerate(df['genre'].cat.categories))
df['genre'] = df['genre'].astype('category').cat.codes

X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns=['genre'],), df['genre'], test_size=0.2, random_state = 42,stratify = Y)
print(Y_train.tolist())
print(Y_test.tolist())

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'

# Sauvegarde des fichiers
joblib.dump(X_train, os.path.join(save_dir, "X_train.pkl"))
joblib.dump(X_test, os.path.join(save_dir, "X_test.pkl"))
joblib.dump(Y_train, os.path.join(save_dir, "Y_train.pkl"))
joblib.dump(Y_test, os.path.join(save_dir, "Y_test.pkl"))
joblib.dump(scaler, os.path.join(save_dir, "scaler.pkl"))
joblib.dump(genre_mapping, os.path.join(save_dir, "genre_mapping.pkl"))

files_to_zip = [
    "X_train.pkl",
    "X_test.pkl",
    "Y_train.pkl",
    "Y_test.pkl",
    "scaler.pkl",
    "genre_mapping.pkl"
]

zip_filename = "features_data.zip"
zip_path = os.path.join(save_dir, zip_filename)

with zipfile.ZipFile(zip_path, "w") as zipf:
    for filename in files_to_zip:
        file_path = os.path.join(save_dir, filename)
        zipf.write(file_path, arcname=filename)

# Lien pour télécharger
display(FileLink(zip_path))

In [None]:
# Stocker les features/labels

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib
import os
import zipfile
from IPython.display import FileLink

X = [] #features
Y = [] #labels

def extract_features(file_path, duration=30, sr=22050):
    y, sr = librosa.load(file_path, sr=sr, duration=duration)

    # Fonction pour extraire la moyenne et l'écart-type
    def stats(feature):
        return list(map(float, np.mean(feature, axis=1))) + list(map(float, np.std(feature, axis=1)))

    # Extraction des features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    print(len(mfcc[0]))
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    flatness = librosa.feature.spectral_flatness(y=y)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zero_crossing = librosa.feature.zero_crossing_rate(y=y)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Construction du vecteur de features
    features = []
    for f in [mfcc, rms, spectral_centroid, bandwidth, contrast, flatness, rolloff, tonnetz, zero_crossing]:
        features.extend(stats(f))
    features.append(float(tempo))

    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    print("Genre connu : " + genre)

    """print(np.array(features, dtype=np.float32))"""  # Conversion finale
    print("")

    X.append(features)
    Y.append(genre)

### Fichiers Préentrainés

In [None]:
# Fichiers Préentrainés (GTZAN)
import joblib
import os

X_train = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/X_train.pkl')
Y_train = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/Y_train.pkl')
X_test = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/X_test.pkl')
Y_test = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/Y_test.pkl')
genre_mapping = {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}

print(Y_train)
print(genre_mapping)

In [None]:
# Fichiers Préentrainés (MTG-JAMENDO)
import joblib
import os

X_train_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/X_train.pkl')
Y_train_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/Y_train.pkl')
X_test_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/X_test.pkl')
Y_test_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/Y_test.pkl')
mood_mapping = {0: 'dark', 1: 'deep', 2: 'dream', 3: 'emotional', 4: 'epic', 5: 'happy', 6: 'motivational', 7: 'relaxing', 8: 'romantic', 9: 'sad'}

print(Y_train_mood)
print(mood_mapping)

### Classifiers

In [None]:
#SVM

from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score


#Genre
model = SVC(kernel='linear')
model.fit(X_train, Y_train)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'
joblib.dump(model, os.path.join(save_dir, "svm_model.pkl"))

pred = model.predict(X_test)
print("Prédiction (Genre) : " + str(pred))

accuracy = accuracy_score(Y_test, pred)
print(f'Précision (Genre) : {accuracy:.2%}')


#Mood
model_mood = SVC(kernel='linear')
model_mood.fit(X_train_mood, Y_train_mood)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML_moods'
joblib.dump(model_mood, os.path.join(save_dir, "svm_model_mood.pkl"))

pred_mood = model_mood.predict(X_test_mood)
print("Prédiction (Mood) : " + str(pred_mood))

accuracy = accuracy_score(Y_test_mood, pred_mood)
print(f'Précision (Mood) : {accuracy:.2%}')




In [None]:
#LogisticRegression

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

#Mood
model = LogisticRegression()
model.fit(X_train, Y_train)

pred = model.predict(X_test)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'
joblib.dump(model, os.path.join(save_dir, "LogisticRegModel.pkl"))

print("Prédiction (Genre) : " + str(pred))
accuracy = accuracy_score(Y_test, pred)
print(f'Précision (Genre) : {accuracy:.2%}')


#Genre
model_mood = LogisticRegression()
model_mood.fit(X_train_mood, Y_train_mood)

pred_mood = model_mood.predict(X_test_mood)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML_moods'
joblib.dump(model_mood, os.path.join(save_dir, "LogisticRegModel_mood.pkl"))

print("Prédiction (Genre) : " + str(pred_mood))
accuracy = accuracy_score(Y_test_mood, pred_mood)
print(f'Précision (Genre) : {accuracy:.2%}')

In [None]:
#RandomForest

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier


#Genre
rf_model = RandomForestClassifier(n_estimators=103, random_state=42)

rf_model.fit(X_train, Y_train)
Y_pred = rf_model.predict(X_test)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'
joblib.dump(rf_model, os.path.join(save_dir, "rf_model.pkl"))

accuracy = accuracy_score(Y_test, Y_pred)
print("Prédiction (Genre)", Y_pred)
print(f"Précision (Genre) : {accuracy:.2%}")

#Mood
rf_model_mood = RandomForestClassifier(n_estimators=103, random_state=42)

rf_model_mood.fit(X_train_mood, Y_train_mood)
Y_pred_mood = rf_model_mood.predict(X_test_mood)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML_moods'
joblib.dump(rf_model, os.path.join(save_dir, "rf_model_mood.pkl"))

accuracy = accuracy_score(Y_test_mood, Y_pred_mood)
print("Prédiction (Mood)", Y_pred_mood)
print(f"Précision (Mood) : {accuracy:.2%}")


In [None]:
#GradientBoosting

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

#Genre
model_gbt = GradientBoostingClassifier()
model_gbt.fit(X_train, Y_train)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'
joblib.dump(model_gbt, os.path.join(save_dir, "gbt_model.pkl"))
Y_pred_gbt = model_gbt.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred_gbt)
print("Prédiction (Genre)", Y_pred)
print(f"Précision (Genre)): {accuracy:.2%}")

#Mood
model_gbt_mood = GradientBoostingClassifier()
model_gbt_mood.fit(X_train_mood, Y_train_mood)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML_moods'
joblib.dump(model_gbt_mood, os.path.join(save_dir, "gbt_model_mood.pkl"))
Y_pred_gbt_mood = model_gbt_mood.predict(X_test_mood)
accuracy = accuracy_score(Y_test_mood, Y_pred_gbt_mood)
print("Prédiction (Mood)", Y_pred_gbt_mood)
print(f"Précision (Mood): {accuracy:.2%}")

In [None]:
#K-Nearest Neighbors

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

#Genre
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)

pred_k = knn.predict(X_test)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML'
joblib.dump(knn, os.path.join(save_dir, "knn_model.pkl"))

print("Prédiction (Genre) :" + str(pred_k))
accuracy = accuracy_score(Y_test, pred_k)
print(f'Précision (Genre): {accuracy:.2%}')


#Mood
knn_mood = KNeighborsClassifier(n_neighbors=5)
knn_mood.fit(X_train_mood, Y_train_mood)

pred_k_mood = knn_mood.predict(X_test_mood)
save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML_moods'
joblib.dump(knn, os.path.join(save_dir, "knn_model_mood.pkl"))

print("Prédiction (Mood) :" + str(pred_k_mood))
accuracy = accuracy_score(Y_test_mood, pred_k_mood)
print(f'Précision (Mood): {accuracy:.2%}')

In [None]:
#Version 2 pas fini : Pour tester sur une musique de son choix - To try on a chosen music

import joblib

X_genre = []
X_mood = []

def pred_musique2(file_path, model_genre):

  extract_features(file_path, duration=30, sr=22050)
  df_test = pd.DataFrame(X)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = model.predict(df_test_scaled)
  print(f"Prédiction (Genre): {genre_mapping[int(prediction[0])]}")
  X.clear()




def pred_musique(file_path, model):
  extract_features(file_path, duration=30, sr=22050)
  df_test = pd.DataFrame(X)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = model.predict(df_test_scaled)
  print(f"Prédiction (Genre): {genre_mapping[int(prediction[0])]}")
  X.clear()
  return genre_mapping[int(prediction[0])]


svm_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/svm_model.pkl')
knn_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/knn_model.pkl')
gbt_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/gbt_model.pkl')
rf_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/rf_model.pkl')
LogisticRegModel = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/LogisticRegModel.pkl')

svm_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/svm_model_mood.pkl')
knn_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/knn_model_mood.pkl')
gbt_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/gbt_model_mood.pkl')
rf_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/rf_model_mood.pkl')
LogisticRegModel_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/LogisticRegModel_mood.pkl')

def pred_globale(file_path):
  print("==== SVM ====")
  pred_musique2(file_path, svm_model)
  print("==== KNN ====")
  pred_musique2(file_path, knn_model)
  print("==== Gradient Boosting ====")
  pred_musique2(file_path, gbt_model)
  print("==== Random Forest ====")
  pred_musique2(file_path, rf_model)
  print("==== Logistic Regression ====")
  pred_musique2(file_path, LogisticRegModel)

pred_globale('/content/drive/MyDrive/Artishow/Musique perso/Dua Lipa - Houdini.mp3')

In [None]:
#V1 pour tester sur une musique de son choix (GENRE)

X_genre = []
X_mood = []

def pred_musique_2(file_path, model_genre, model_mood):

  extract_features(file_path, duration=30, sr=22050)

  #Genre
  df_test = pd.DataFrame(X_genre)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = model_genre.predict(df_test_scaled)
  print(f"Prédiction (Genre): {genre_mapping[int(prediction[0])]}")
  X_genre.clear()

  #Mood
  df_test_mood = pd.DataFrame(X_mood)
  scaler_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')
  df_test_scaled_mood = scaler_mood.transform(df_test_mood)
  prediction = model_mood.predict(df_test_scaled_mood)
  print(prediction)
  print(f"Prédiction (Mood): {mood_mapping[int(prediction[0])]}")
  X_mood.clear()


def pred_musique(file_path, model):
  extract_features(file_path, duration=30, sr=22050)
  df_test = pd.DataFrame(X)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = model.predict(df_test_scaled)
  print(f"Prédiction (Genre): {genre_mapping[int(prediction[0])]}")
  X.clear()
  return genre_mapping[int(prediction[0])]

def pred_musique_mood(file_path, model):
  extract_features(file_path, duration=30, sr=22050)
  df_test = pd.DataFrame(X)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = model.predict(df_test_scaled)
  print(prediction)
  print(f"Prédiction (Mood) : {mood_mapping[int(prediction[0])]}")
  X.clear()
  return mood_mapping[int(prediction[0])]

svm_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/svm_model.pkl')
knn_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/knn_model.pkl')
gbt_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/gbt_model.pkl')
rf_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/rf_model.pkl')
LogisticRegModel = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/LogisticRegModel.pkl')

svm_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/svm_model_mood.pkl')
knn_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/knn_model_mood.pkl')
gbt_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/gbt_model_mood.pkl')
rf_model_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/rf_model_mood.pkl')
LogisticRegModel_mood = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/LogisticRegModel_mood.pkl')

def pred_globale(file_path):
  print("==== SVM ====")
  pred_musique(file_path, svm_model)
  pred_musique_mood(file_path, svm_model_mood)
  print("==== KNN ====")
  pred_musique(file_path, knn_model)
  pred_musique_mood(file_path, knn_model_mood)
  print("==== Gradient Boosting ====")
  pred_musique(file_path, gbt_model)
  pred_musique_mood(file_path, gbt_model_mood)
  print("==== Random Forest ====")
  pred_musique(file_path, rf_model)
  pred_musique_mood(file_path, rf_model_mood)
  print("==== Logistic Regression ====")
  pred_musique(file_path, LogisticRegModel)
  pred_musique_mood(file_path, LogisticRegModel_mood)

pred_globale('/content/drive/MyDrive/Artishow/Musique perso/Dua Lipa - Houdini.mp3')

## Approche 2 : CNN + Spectrograms


### Conversion en spectrogramme

In [None]:
#Spectrogramme

import shutil
import os
import random


def audio_to_mel_spec(audio, save_path, sr=22050, n_mels=128, hop_length=512):
    y, sr = librosa.load(audio, sr=sr, duration=30)
    melspec = librosa.feature.melspectrogram(y=y, sr=sr,n_mels=n_mels
                                             , hop_length=hop_length)
    melspec_db = librosa.power_to_db(melspec, ref=np.max)

    # Plot & save spectrogram
    plt.figure(figsize=(4.32, 2.88), dpi=100)  # image 224x224 pixels
    #librosa.display.specshow(melspec_db)  # ou 'inferno'
    plt.imshow(melspec_db, aspect='auto', origin='lower', cmap='magma', vmin=-42, vmax=0)

    plt.axis('off')
    plt.tight_layout(pad=0)
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.show()

output_dir = "/content/drive/MyDrive/Artishow/spectrogramStorage"
i=0

for audio in audio_files[:1000]:
    i+=1
    print(i)
    file_name = os.path.basename(audio).replace('.mp3','.png').replace('.wav','.png')
    save_path = os.path.join(output_dir, file_name)
    print(file_name)
    audio_to_mel_spec(audio, save_path)

shutil.make_archive("/kaggle/working/dataset_spectrograms", 'zip', output_dir)
print("fichier créé pret a download")

In [None]:
#Mettre en forme les données

import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

spectrograms_dir = '/content/drive/MyDrive/Artishow/Data/images_original'
data = []

for genre in os.listdir(spectrograms_dir):
    genre_dir = os.path.join(spectrograms_dir, genre)
    if not os.path.isdir(genre_dir):
        continue
    for file in os.listdir(genre_dir):
        if file.endswith(".png"):
            file_path = os.path.join(genre_dir, file)
            data.append([file_path, genre])

df = pd.DataFrame(data, columns=["filename", "genre"])
display(df.head())

IMG_SIZE = (224,224)
BATCH_SIZE = 32
DATASET_DIR = "dataset_spectrograms"

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_generator = datagen.flow_from_dataframe(
    dataframe = df,
    x_col="filename",
    y_col="genre",
    target_size = IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode = "categorical",
    subset = "training"
)

val_generator = datagen.flow_from_dataframe(
    dataframe = df,
    x_col="filename",
    y_col="genre",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode = "categorical",
    subset = "validation"
)


### Convolutional Neural Network

In [None]:
#VGG-16

import os
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

filepath = "/content/drive/MyDrive/Artishow/Modèle/IA/vgg16_weights_tf_dim_ordering_tf_kernels_notop (1).h5"
base_model = VGG16(weights=filepath,
                include_top=False,
                input_shape=((224,224, 3))
)

base_model.trainable = False
model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(df['genre'].nunique(),activation='softmax')])

model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

model.save("/content/drive/MyDrive/Artishow/Modèle/IA/vgg16_genre_classification.h5")

loss, accuracy = model.evaluate(val_generator)

In [None]:
from sklearn.metrics import accuracy_score

model = tf.keras.models.load_model("/content/drive/MyDrive/Artishow/Modèle/IA/vgg16_genre_classification.h5")
pred = model.predict(val_generator)

true_classes = val_generator.classes
pred_classes = np.argmax(pred, axis=1)
acc = accuracy_score(true_classes, pred_classes)
print(f"Accuracy sur validation set : {acc:.2%}")

In [None]:
audio_to_mel_spec("/content/drive/MyDrive/Artishow/Musique perso/Dua Lipa - Houdini.mp3", "/content/drive/MyDrive/Artishow/spectrogramStorage/MusiquePerso/specPerso")
import keras.preprocessing.image as image
# Charger l’image et la préparer pour le modèle

def load_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.  # Si tu as utilisé rescale=1./255
    return img_array

# Charger ton modèle entraîné
model = tf.keras.models.load_model("/content/drive/MyDrive/Artishow/Modèle/IA/vgg16_genre_classification.h5")

# Exemple d'audio à prédire
spec_path = "/content/drive/MyDrive/Artishow/spectrogramStorage/MusiquePerso/specPerso.png"
img_array = load_image(spec_path)

# Prédiction
prediction = model.predict(img_array)
print(prediction)
predicted_indices = np.argsort(prediction)[0][-2:][::-1]
print(f"Genres prédits : {predicted_indices}")

## Benchmark

In [None]:
#Benchmark

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier

models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(kernel='linear', probability=True),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=103, random_state=42)
}

results = []

def compute_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

def compute_auc(y_true, y_prob):
    if y_prob is not None:
        try:
            y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
            return roc_auc_score(y_true_bin, y_prob, average='weighted', multi_class='ovr')
        except ValueError:
            return None
    return None

print("Réponse :", Y_test.tolist())

#Entrainement

for name, model in models.items():
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    prob = model.predict_proba(X_test) if hasattr(model, "predict_proba") else None
    accuracy = accuracy_score(Y_test, pred)
    f1 = compute_f1_score(Y_test, pred)
    auc = compute_auc(Y_test, prob)
    print(f"Prédiction {name} : {pred}")
    results.append((name, accuracy, f1, auc))

#Affichage
results_df = pd.DataFrame(results, columns=["Modele", "Precision", "F1-Score", "AUC"])
print(results_df.sort_values(by="F1-Score", ascending=False))


In [None]:
#Matrice de confusion
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


use_mood = False  # Mets False pour genre

mood_mapping = {0: 'dark', 1: 'deep', 2: 'dream', 3: 'emotional', 4: 'epic', 5: 'happy', 6: 'motivational', 7: 'relaxing', 8: 'romantic', 9: 'sad'}
genre_mapping = {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}

mapping = mood_mapping if use_mood else genre_mapping
labels = [mapping[cle] for cle in mapping]

models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(kernel='linear', probability=True),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=103, random_state=42)
}

for name, model in models.items():
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    cm = confusion_matrix(Y_test, pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", # fmt="d" pour afficher des entiers
                xticklabels=labels, yticklabels=labels)
    plt.xlabel('Prédictions')
    plt.ylabel('Vraies valeurs')
    plt.title(name)
    plt.show()


## Ensemble learning

In [None]:
model_mapping = {0:"SVM", 1:"Logistic Regression", 2:"Random Forest", 3:"Gradient Boosting", 4:"k-Nearest Neighbors", 5:"Ensemble Learnings"}

In [None]:
# ENSEMBLE LEARNING
def pos_max(t):
    p = 0
    for i in range(len(t)):
        if t[i]>t[p]:
            p = i
    return p

# Fonction qui prend les N_t (5) tableaux en entrées et renvoie un tableau
# ensemble learning : e.l.
def ensemble_learning(Preds):
  Y_pred_el = []
  N_t = len(Preds)
  N = len(Preds[0])
  for i in range(N):
      total_par_genre = [0 for k in range(len(genre_mapping))]
      for j in range(N_t):
          total_par_genre[Preds[j][i]] += 1
      # print(total_par_genre)
      g = pos_max(total_par_genre)
      Y_pred_el.append(g)
  return Y_pred_el

In [None]:
#All Classifiers
# couleur : \033[1;3Om #30 à 37 avant le m

# IMPORTS
from time import sleep
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler

# FONCTION GENERALE --- UTILISABLE POUR PRÉDIRE LE GENRE DE N'IMPORTE QUELLE MUSIQUE
def model_predictions(X_input):
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  X_input = scaler.transform(X_input)
  Preds = []
  Y_pred_svm = model_svm.predict(X_input)
  Preds.append(Y_pred_svm)
  Y_pred_lr = model_lr.predict(X_input)
  Preds.append(Y_pred_lr)
  Y_pred_rf = model_rf.predict(X_input)
  Preds.append(Y_pred_rf)
  Y_pred_gbt = model_gbt.predict(X_input)
  Preds.append(Y_pred_gbt)
  Y_pred_knn = model_knn.predict(X_input)
  Preds.append(Y_pred_knn)
  Y_pred_el = ensemble_learning(Preds) #diff car e.l.
  Preds.append(Y_pred_el)
  return Preds

# TRAINING
model_svm = SVC(kernel='linear')
model_svm.fit(X_train, Y_train)
model_lr = LogisticRegression()
model_lr.fit(X_train, Y_train)
model_rf = RandomForestClassifier(n_estimators=103, random_state=42)
model_rf.fit(X_train, Y_train)
model_gbt = GradientBoostingClassifier()
model_gbt.fit(X_train, Y_train)
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, Y_train)

# POUR LE SET TEST
resultats = False
affichage = False
comparaison = True
if resultats:
  Preds = model_predictions(X_test)
  N_t = len(Preds)
  N = len(Preds[0])
  # ACCURACY
  accuracy_svm = accuracy_score(Y_test, Preds[0])
  print(f'\033[1;31m Précision SVM : {accuracy_svm:.2%}')
  accuracy_lr = accuracy_score(Y_test, Preds[1])
  print(f'\033[1;32m Précision LogisticRegression : {accuracy_lr:.2%}')
  accuracy_rf = accuracy_score(Y_test, Preds[2])
  print(f"\033[1;33m Précision RandomForest: {accuracy_rf:.2%}")
  accuracy_gbt = accuracy_score(Y_test, Preds[3])
  print(f"\033[1;34m Précision GradientBoosting: {accuracy_gbt:.2%}")
  accuracy_knn = accuracy_score(Y_test, Preds[4])
  print(f'\033[1;35m Précision k-NearestNeighbours : {accuracy_knn:.2%}')
  accuracy_el = accuracy_score(Y_test, Preds[N_t-1])
  print(f'\033[1;36m Précision EnsembleLearning : {accuracy_el:.2%}')

  # PREDICTIONS
  if affichage:
    if not comparaison:
      print("\033[1;31m prédictions SVM : " + str(Preds[0]))
      print("\033[1;32m prédictions LogisticRegression : " + str(Preds[1]))
      print("\033[1;33m prédictions RandomForest : " + str(Preds[2]))
      print("\033[1;34m prédictions GradientBoosting : " + str(Preds[3]))
      print("\033[1;35m prédictions K-Nearest Neighbors : " + str(Preds[4]))
      print("\033[1;36m prédictions EnsembleLearning : " + str(Preds[N_t-1]))
    else:
      print("\033[1;30m Toutes les prédictions : ")
      for i in range(N):
        for j in range(N_t):
          string = ''
          string += f'\033[1;{31+j}m'
          string += f'{Preds[j][i]} '
          print(string, end='')
        print(f'\033[1;30m \n')

In [None]:
# -- on a besoin de extract features sans ajouter à X et Y

def array_features(files, duration=30, sr=22050):
  N_f = len(files)
  X_features = []
  Y_genres = []
  def stats(feature):
      return list(map(float, np.mean(feature, axis=1))) + list(map(float, np.std(feature, axis=1)))
  print('Extracting features :', end='')
  for i in range(N_f):
    file_path = files[i]
    file_name = os.path.basename(file_path).replace('.mp3','.png').replace('.wav','.png')
    y, sr = librosa.load(file_path, sr=sr, duration=duration)
    # Extraction des features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    flatness = librosa.feature.spectral_flatness(y=y)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zero_crossing = librosa.feature.zero_crossing_rate(y=y)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    # Construction du vecteur de features
    features = []
    for f in [mfcc, rms, spectral_centroid, bandwidth, contrast, flatness, rolloff, tonnetz, zero_crossing]:
        features.extend(stats(f))
    features.append(float(tempo))
    X_features.append(features)
    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    Y_genres.append(genre)
    print(f'{i+1}✓', end='')
  print('')
  return X_features, Y_genres

In [None]:
# SAMUEL -- on a besoin de extract features sans ajouter à X et Y

def array_features(files, duration=30, sr=22050):
  N_f = len(files)
  X_features = []
  Y_genres = []
  def stats(feature):
      return list(map(float, np.mean(feature, axis=1))) + list(map(float, np.std(feature, axis=1)))
  print('Extracting features :', end='')
  for i in range(N_f):
    file_path = files[i]
    file_name = os.path.basename(file_path).replace('.mp3','.png').replace('.wav','.png')
    y, sr = librosa.load(file_path, sr=sr, duration=duration)
    # Extraction des features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    flatness = librosa.feature.spectral_flatness(y=y)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zero_crossing = librosa.feature.zero_crossing_rate(y=y)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    # Construction du vecteur de features
    features = []
    for f in [mfcc, rms, spectral_centroid, bandwidth, contrast, flatness, rolloff, tonnetz, zero_crossing]:
        features.extend(stats(f))
    features.append(float(tempo))
    X_features.append(features)
    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    Y_genres.append(genre)
    print(f'{i+1}✓', end='')
  print('')
  return X_features, Y_genres

# Juste le genre svp

def extract_known_genre(files, duration=30, sr=22050):
  N_f = len(files)
  Y_genres = []
  for i in range(N_f):
    file_path = files[i]
    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    Y_genres.append(genre)
  return Y_genres

In [None]:
# FINAL EL FOR PREDICTION
# pour le moment tqt
# -----------------------------------------------------------------------------------------------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

F = audio_files #np.random.choice(audio_files, 100, replace=False) # ou autre Files array
W = extract_known_genre(F)

df2 = pd.DataFrame(F)
df2['genre'] = W
df2['genre'] = df2['genre'].astype('category')
genre_mapping = dict(enumerate(df2['genre'].cat.categories))
df2['genre'] = df2['genre'].astype('category').cat.codes
F_train, F_test, W_train, W_test = train_test_split(df2.drop(columns=['genre'],), df2['genre'], test_size=0.2, random_state = 42,stratify = W)
F_train = F_train.squeeze().tolist()
F_test = F_test.squeeze().tolist()
W_train = W_train.tolist()
W_test = W_test.tolist()
F_meta_train, F_meta_test, W_meta_train, W_meta_test = train_test_split(F_test, W_test, test_size=0.2, random_state=42, stratify=W_test)
#save_dir = '/content/drive/MyDrive/Artishow/Modèle/ML' ça s'était pour logistic regression
#joblib.dump(model, os.path.join(save_dir, "Logi.pkl")) ça s'était pour logistic regression

In [None]:
# essais ensemble learning 2
# partie cnn
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import keras.preprocessing.image as image
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import joblib

model_cnn = tf.keras.models.load_model("/content/drive/MyDrive/Artishow/Modèle/IA/vgg16_genre_classification.h5")
save_dir = "/content/drive/MyDrive/Artishow/spectrogramStorage/MusiquePerso"

def audio_to_mel_spec(audio_path, save_dir, sr=22050, n_mels=128, hop_length=512):
    y, sr = librosa.load(audio_path, sr=sr, duration=30)
    melspec = librosa.feature.melspectrogram(y=y, sr=sr,n_mels=n_mels, hop_length=hop_length)
    melspec_db = librosa.power_to_db(melspec, ref=np.max)
    # Plot & save spectrogram
    plt.figure(figsize=(4.32, 2.88), dpi=100)  # image 224x224 pixels
    #librosa.display.specshow(melspec_db)  # ou 'inferno'
    plt.imshow(melspec_db, aspect='auto', origin='lower', cmap='magma', vmin=-42, vmax=0)
    plt.axis('off')
    plt.tight_layout(pad=0)
    # Generate unique filename
    file_name = os.path.basename(audio_path).replace('.mp3','.png').replace('.wav','.png')
    save_path = os.path.join(save_dir, file_name)
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close() # Close the plot to prevent it from displaying
    return save_path

def load_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.  # Si tu as utilisé rescale=1./255
    return img_array

def pre_process_cnn(files):
  im_arrays = []
  spec_paths = []
  for f in files:
    spec_path = audio_to_mel_spec(f, save_dir)
    img_array = load_image(spec_path)
    im_arrays.append(img_array)
    spec_paths.append(spec_path) # Store the path to clean up later
  return np.vstack(im_arrays), spec_paths # Stack the images to a single numpy array

# partie machine learning
from time import sleep
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
# TRAINING
# à voir si besoin de réentraîner
model_svm = SVC(kernel='linear', probability=True)
model_svm.fit(X_train, Y_train)
model_lr = LogisticRegression()
model_lr.fit(X_train, Y_train)
model_rf = RandomForestClassifier(n_estimators=103, random_state=42)
model_rf.fit(X_train, Y_train)
model_gbt = GradientBoostingClassifier()
model_gbt.fit(X_train, Y_train)
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, Y_train)

# general
def model_probas(files): #CNN !!!!!!!
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  X_features, _ = array_features(files) # le 2e c'est les genres
  X_input = scaler.transform(X_features)
  Y_probas_svm = model_svm.predict_proba(X_input)
  Y_probas_lr = model_lr.predict_proba(X_input)
  Y_probas_rf = model_rf.predict_proba(X_input)
  Y_probas_gbt = model_gbt.predict_proba(X_input)
  Y_probas_knn = model_knn.predict_proba(X_input)
  im_arrays, spec_paths = pre_process_cnn(files)
  Y_probas_cnn = model_cnn.predict(im_arrays)
  Z_input = []
  for i in range(len(files)): # Iterate through the number of files
    Z_input.append([*Y_probas_svm[i], *Y_probas_lr[i], *Y_probas_rf[i], *Y_probas_gbt[i], *Y_probas_knn[i], *Y_probas_cnn[i]])
  # Clean up generated spectrogram files #ça c'est l'ia google mais bon jvais laisser mdr
  for spec_path in spec_paths:
      os.remove(spec_path)
  return Z_input

#essai
#files1 = audio_files[:3]
#files2 = ["/content/drive/MyDrive/Artishow/Musique perso/Dua Lipa - Houdini.mp3"]
#Z = model_probas(files1)

In [None]:
# ENSEMBLE LEARNING
def pos2_max(t):
  p1 = 0
  p2 = 0
  for i in range(len(t)):
    if t[i]>t[p1]:
      p2 = p1
      p1 = i
    elif t[i]>t[p2]:
      p2 = i
  return (p1, p2)

In [None]:
# META MODEL TRAINING
Z_meta_train = model_probas(F_meta_train)
Z_meta_test = model_probas(F_meta_test)

scaler = StandardScaler()
Z_meta_train = scaler.fit_transform(Z_meta_train)
Z_meta_test = scaler.transform(Z_meta_test)

meta_model = LogisticRegression()
meta_model.fit(Z_meta_train, W_meta_train)
#W_meta_pred_test = meta_model.predict(Z_meta_test)
#accuracy = accuracy_score(W_meta_pred_test, W_meta_test)
#print(f'Précision du modèle : {accuracy:.2%}')
#W_2rep = meta_model_predictions(Z_meta_test)

In [None]:
def meta_model_predictions(Z_input):
  Ws = meta_model.predict_proba(Z_input)
  W_2rep = [pos2_max(v) for v in Ws]
  return W_2rep

In [None]:
print(F_meta_train)

In [None]:
print(audio_files[2:4])

# PHASE 2 : Playlist


## Triage d'un tableau par genre - sorting a table by genre


In [None]:
# Algorithme qui prend plein de musiques et en détermine le genre
from tabulate import tabulate
import random

# Récupérer des files -- à changer avec un dossier
subset = random.sample(audio_files, 10)
X_subset, Y_genres = array_features(subset)
X_names = [os.path.basename(file).replace('.mp3','').replace('.wav','') for file in subset] #file_name for file in subset

Songs = list(map(lambda x,y : (x,y), X_names, Y_genres))
affiche_tab_t_g(Songs, statut="connu")

tab = model_predictions(X_subset)
for i in range(len(tab)):
  print(model_mapping[i])
  Songs = list(map(lambda x, y: (x,genre_mapping[y]), X_names, tab[i]))
  affiche_tab_t_g(Songs)

In [None]:
# Algorithme qui affiche un tableau (titre, genre) trié par genre
from tabulate import tabulate
import random

# L'algo
def affiche_tab_t_g(couples, statut="prédit"):
  couples.sort(key=lambda tuple: tuple[1]) #sort by second parameter (ordre alphabétique)
  print(tabulate(couples, headers=['Title', f'Genre ({statut})'], tablefmt="rounded_grid"))

# Exemple
# Récupérer des couples pour un exemple
subset = random.sample(audio_files, 100)
Songs = []
for i in range(10):
  file_path = subset[i]
  genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
  file_name = os.path.basename(file_path).replace('.mp3','.png').replace('.wav','.png')
  Songs.append((file_name, genre))
affiche_tab_t_g(Songs, statut="connu")

## Imports Deezer

In [None]:
import os
import pandas as pd
import requests
import time
from google.colab import drive

# Monter Google Drive
drive.mount('/content/drive')

# Chemin de base où les fichiers seront stockés
base_path = '/content/drive/My Drive/Artishow/Deezer'
os.makedirs(base_path, exist_ok=True)

# Genres musicaux à traiter
genres = ['jazz', 'country', 'pop', 'reggae', 'blues',
          'hiphop', 'classical', 'disco', 'metal', 'rock']

# Fonction pour rechercher les playlists liées à un genre
def get_playlists_for_genre(genre, limit=5):
    url = f'https://api.deezer.com/search/playlist?q={genre}'
    response = requests.get(url).json()
    return response.get('data', [])[:limit]

# Fonction pour extraire les morceaux d'une playlist
def get_tracks_from_playlist(playlist_id):
    url = f'https://api.deezer.com/playlist/{playlist_id}'
    response = requests.get(url).json()
    tracks = response.get('tracks', {}).get('data', [])
    results = []
    for track in tracks:
        results.append({
            'track_id': track['id'],
            'title': track['title'],
            'artist': track['artist']['name'],
            'rank': track['rank'],
            'preview': track['preview']
        })
    return results

# Boucle principale sur chaque genre
for genre in genres:
    print(f"\n Traitement du genre : {genre}")
    all_tracks = []

    # Créer le dossier pour ce genre
    genre_folder = os.path.join(base_path, genre)
    os.makedirs(genre_folder, exist_ok=True)

    # Récupérer les playlists populaires du genre
    playlists = get_playlists_for_genre(genre, limit=5)
    if not playlists:
        print(f"Aucune playlist trouvée pour {genre}")
        continue

    # Récupérer les morceaux de chaque playlist
    for playlist in playlists:
        playlist_id = playlist['id']
        try:
            tracks = get_tracks_from_playlist(playlist_id)
            all_tracks.extend(tracks)
        except:
            print(f"Erreur lors du traitement de la playlist {playlist_id}")
        time.sleep(0.5)  # Respecter un petit délai pour l'API

    if not all_tracks:
        print(f" Aucun morceau récupéré pour {genre}")
        continue

    # Nettoyage : enlever les doublons, trier par popularité (rank décroissant), garder les 100 premiers
    df = pd.DataFrame(all_tracks)
    df.drop_duplicates(subset='track_id', inplace=True)
    df.sort_values(by='rank', ascending=False, inplace=True)
    df_top100 = df.head(100)

    # Sauvegarder la liste CSV (utile pour audit)
    csv_path = os.path.join(genre_folder, f'{genre}_top100.csv')
    df_top100.to_csv(csv_path, index=False)

    # Télécharger les previews MP3 dans le dossier du genre
    print(f"Téléchargement des previews pour {genre}")
    for _, row in df_top100.iterrows():
        track_id = row['track_id']
        title = row['title']
        preview_url = row['preview']

        # Vérifier que le lien est bien un lien valide
        if not isinstance(preview_url, str) or not preview_url.startswith("http"):
            continue

        # Construire un nom de fichier propre
        safe_title = title[:30].replace('/', '_').replace('\\', '_')
        filename = f"{track_id}_{safe_title}.mp3"
        filepath = os.path.join(genre_folder, filename)

        # Télécharger uniquement si pas déjà présent
        if os.path.exists(filepath):
            continue

        try:
            r = requests.get(preview_url)
            with open(filepath, 'wb') as f:
                f.write(r.content)
            print(f"{filename}")
        except:
            print(f"Échec pour {title}")

In [None]:
files2 = filepaths = ['/content/drive/My Drive/Artishow/Deezer/pop/3242688291_NINAO.mp3']
Ze = model_probas(files2)
Ws = meta_model_predictions(Ze)
print(files2)
print(Ws)

## Génération de playlist - Playlist Generation

### Génération de playlist à partir d'un mood

### Approche 1 : simpliste avec des boutons

In [None]:
moods = ['happy', 'relaxing', 'dark', 'epic', 'dream', 'sad', 'motivational', 'deep', 'romantic', 'emotional']

In [None]:
def playlist_generator_mood_1(mood):
  # Filtrer selon le mood/genre
    filtered_df = df_audio[df_audio['mood'].apply(lambda moods: mood in moods)]

    # Vérification
    if filtered_df.empty:
        print(f"Aucun morceau trouvé pour le mood : {mood}")
        return pd.DataFrame()

    # Tirage aléatoire de n morceaux (ou moins si pas assez)
    playlist = filtered_df.sample(n=min(15, len(filtered_df)), random_state=42)

    return playlist[['filename', 'filepath']]


In [None]:
# Action à exécuter lorsqu'on clique sur un bouton
def on_button_click(b):
    mood_clicked = b.description.lower()
    clear_output(wait=True)
    display(widgets.HBox(buttons), output)
    with output:
        print(f"🎧 Playlist pour le mood : {mood_clicked}")
        playlist = generate_playlist(mood_clicked, df_audio_features)
        display(playlist)

# Associer l'événement à chaque bouton
for button in buttons:
    button.on_click(on_button_click)

# Afficher l'interface
display(widgets.HBox(buttons), output)

## Approche 2 : A partir d'une phrase - Generate from a sentence

In [None]:
from transformers import pipeline

In [None]:
# Commande pour supprimer récursivement (-r) et forcer (-f) le dossier du cache
# Le "!" au début permet d'exécuter une commande système dans Colab
!rm -rf ~/.cache/huggingface

In [None]:
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")  ### Modèle déjà entrainé

In [None]:
phrase_utilisateur = "I'm doing an incredible battle"

In [None]:
### On lance la classification

resultat = classifier(phrase_utilisateur, moods)
print(f"Phrase à analyser : '{resultat['sequence']}'")
print("\nRésultats du classement des moods :")
# Le résultat est une liste de labels triés du plus probable au moins probable.
for label, score in zip(resultat['labels'], resultat['scores']):
    print(f"- Mood : {label}, Score : {score:.2%}") #

In [None]:
def generator_playlist_mood():
  phrase_utilisateur = str(input("Quel est votre mood actuel ? "))
  resultats = classifier(phrase_utilisateur, moods)
  mood = resultats['labels'][0]
  playlist = playlist_generator_mood_1(mood)
  if len(playlist) < 5:
    playlist = pd.concat([playlist, playlist_generator_mood_1(resultats['labels'][1])])
  return playlist

In [None]:
generator_playlist_mood()

## Génération à partir d'une musique

In [None]:
import requests
import json

# --- CONFIGURATION ---
API_URL = "https://api.deezer.com"

In [None]:
### Recherche de la musique

import requests
import json

# --- CONFIGURATION ---
API_URL = "https://api.deezer.com"


# Fonction pour chercher une chanson et retourner son ID et son titre
def search_track(song_name: str):
    print(f"1. Recherche de la chanson : '{song_name}'...")

    response = requests.get(f"{API_URL}/search/track", params={"q": song_name})

    if response.status_code != 200:
        print("Erreur lors de la recherche.")
        return None

    data = response.json()
    if not data.get('data'):
        print("Aucune chanson trouvée.")
        return None

    # On prend le premier résultat, qui est généralement le plus pertinent
    first_result = data['data'][0]
    track_id = first_result['id']
    track_title = first_result['title']
    artist_name = first_result['artist']['name']
    url = first_result['link']

    print(f"   -> Chanson trouvée : '{track_title}' par {artist_name} (ID: {track_id})")
    return {"id": track_id, "title": track_title, 'url' : url}

In [None]:
import os
import pandas as pd

# Extensions audio reconnues
AUDIO_EXTENSIONS = ('.mp3', '.wav', '.flac', '.ogg', '.aac', '.m4a')

# Genres disponibles dans deezer
genres = [
    "rock", "reggae", "pop", "metal", "jazz",
    "hiphop", "disco", "country", "classical", "blues"
]

# Définir le chemin vers ton dossier deezer dans Drive
base_path = '/content/drive/MyDrive/Artishow/Deezer'

# Fonction de collecte
def collect_audio_files_info(base_path):
    data = []
    dict = {}

    for genre in genres:
        genre_folder = os.path.join(base_path, genre)
        if not os.path.exists(genre_folder):
            print(f"[Avertissement] Dossier introuvable : {genre_folder}")
            continue

        for root, _, files in os.walk(genre_folder):
            for file in files:
                if file.lower().endswith(AUDIO_EXTENSIONS):
                    full_path = os.path.join(root, file)
                    data.append({
                        "filename": file,
                        "filepath": full_path,
                        "genre": genre
                    })

    return pd.DataFrame(data)

df_audio = collect_audio_files_info(base_path)

audio_files_2 = df_audio['filepath']
audio_files_2 = audio_files_2.tolist()

print(audio_files_2[:5])
print(df_audio.head())

In [None]:
def playlist_generator_music(song_name: str):
  id = search_track(song_name)['id']
  genre = genre_decoding(p_vote_maj(Votes_of(models_Genres_probas_of(audio_files[:2], "paths"))))
  mood = f_mood_tristan(search_track(id))
  filtered_df = df_audio[
    df_audio['mood'].apply(lambda moods: mood in moods) &
    (df_audio['genre'] == genre[0])
]
  if filtered_df.empty:
    filtered_df = df_audio[
      df_audio['genre'] == genre[0]
  ]
    return filtered_df.sample(n=min(15, len(filtered_df)), random_state=42)
 # Tirage aléatoire de n morceaux (ou moins si pas assez)
  playlist = filtered_df.sample(n=min(15, len(filtered_df)), random_state=42)
  return playlist

In [None]:
def playlist_generator_music(song_name: str):
    id = search_track(song_name)['id']
    genre = genre_decoding(p_vote_maj(Votes_of(models_Genres_probas_of(audio_files, "paths"))))
    mood = f_mood_tristan(search_track(id))

    filtered_df = df_audio[
    (df_audio['mood'] == mood) &
    (df_audio['genre'] == genre[0])
]


    if len(filtered_df) < 5:
        base_df = df_audio.copy()

        already_selected = base_df.index
        genre_df = df_audio[
            (df_audio['genre'] == genre[0]) &
            (~df_audio.index.isin(already_selected))
        ]


        to_add = min(8 - len(base_df), len(genre_df))
        additional_df = genre_df.sample(n=to_add, random_state=42)

        final_df = pd.concat([base_df, additional_df])
        final_df = final_df.sample(n=min(8, len(final_df)), random_state=42)
    else:

        final_df = filtered_df.sample(n=min(15, len(filtered_df)), random_state=42)

    return final_df


In [None]:
playlist_generator_music("7 rings")

In [None]:
df_audio.head()

## Génération de playlist à partir d'un audio

In [None]:
!pip install ffmpeg-python

In [None]:
# Importations nécessaires
from IPython.display import HTML, Audio, display
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read, write
import io
import ffmpeg # Pour la conversion de format

# --- LE COEUR DE LA FONCTION : LE TEMPLATE JAVASCRIPT ---
# Ce code n'est pas exécuté par Python, mais sera envoyé au navigateur.
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var my_progress = document.createElement("PROGRESS");
var my_span = document.createElement("SPAN");

// Configuration de l'interface utilisateur
my_p.innerHTML = "Appuyez sur le bouton pour démarrer l'enregistrement :";
my_btn.innerHTML = "Enregistrer (30s)";
my_progress.value = 0;
my_progress.max = 100;
my_span.innerHTML = " 0%";

// Ajout des éléments à la page
document.body.appendChild(my_div);
my_div.appendChild(my_p);
my_div.appendChild(my_btn);
my_div.appendChild(my_progress);
my_div.appendChild(my_span);

var recorder;
var interval;
var startTime;
const DURATION = 30000; // Durée en millisecondes (30s)

// Fonction pour mettre à jour la barre de progression
function updateProgress() {
    let elapsedTime = new Date() - startTime;
    let progress = (elapsedTime / DURATION) * 100;
    my_progress.value = Math.min(progress, 100);
    my_span.innerHTML = ` ${Math.round(Math.min(progress, 100))}%`;
}

// Fonction pour démarrer l'enregistrement
function startRecording() {
    my_btn.innerHTML = "En cours... Cliquez pour arrêter";
    my_btn.onclick = stopRecording; // Le bouton sert maintenant à arrêter

    // Demande l'accès au microphone
    navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
        recorder = new MediaRecorder(stream);
        recorder.start();
        startTime = new Date();

        // Mettre à jour la barre de progression toutes les 100ms
        interval = setInterval(updateProgress, 100);

        // Récupérer les données audio
        let chunks = [];
        recorder.ondataavailable = e => chunks.push(e.data);

        // Quand l'enregistrement s'arrête (manuellement ou à la fin)
        recorder.onstop = e => {
            let blob = new Blob(chunks, { 'type' : 'audio/webm; codecs=opus' });
            let reader = new FileReader();
            reader.onload = () => {
                // Retourner l'audio à Python
                var b64 = reader.result.split(',')[1];
                google.colab.kernel.invokeFunction('notebook.save_audio', [b64], {});
            };
            reader.readAsDataURL(blob);
        };

        // Arrêter automatiquement après DURATION
        setTimeout(stopRecording, DURATION);
    });
}

// Fonction pour arrêter l'enregistrement
function stopRecording() {
    if (recorder && recorder.state === "recording") {
        recorder.stop();
        clearInterval(interval);
        my_btn.innerHTML = "Terminé !";
        my_btn.disabled = true;
    }
}

// Lier la fonction de démarrage au clic initial
my_btn.onclick = startRecording;

</script>
"""

# --- LA FONCTION PYTHON QUI ORCHESTRE LE TOUT ---
def record_audio(filename="enregistrement.wav"):
    """
    Lance une interface d'enregistrement audio dans Colab pour 30s.
    L'enregistrement peut être arrêté manuellement.
    Sauvegarde l'audio dans un fichier WAV.
    """

    # Étape 1: Définir une fonction que JavaScript pourra appeler pour renvoyer l'audio
    audio_data = None
    def save_audio(b64_data):
        nonlocal audio_data
        audio_data = b64decode(b64_data)

    output.register_callback('notebook.save_audio', save_audio)

    # Étape 2: Afficher l'interface HTML/JS
    print("Préparation de l'interface d'enregistrement...")
    display(HTML(AUDIO_HTML))

    # Étape 3: Attendre que JavaScript renvoie les données
    # Cette boucle attend que la variable audio_data soit remplie par le callback
    while audio_data is None:
        pass

    print("\nEnregistrement terminé. Conversion et sauvegarde en cours...")

    # Étape 4: Conversion et sauvegarde du fichier
    # L'audio arrive du navigateur en format webm/opus, il faut le convertir en WAV.
    try:
        # Utilisation de ffmpeg pour la conversion en mémoire
        proc = (
            ffmpeg
            .input('pipe:0')
            .output('pipe:1', format='wav')
            .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
        )
        out, err = proc.communicate(input=audio_data)

        # Lire les données WAV converties
        rate, data = read(io.BytesIO(out))

        # Sauvegarder le fichier WAV
        write(filename, rate, data)
        print(f"Fichier audio sauvegardé sous le nom : {filename}")
        return filename
    except ffmpeg.Error as e:
        print("Erreur ffmpeg (assurez-vous que ffmpeg est installé) :")
        print(e.stderr.decode())
        return None

In [None]:
def generer_playlist_audio():
  audio_filename = record_audio()
  genre =  genre_decoding(p_vote_maj(Votes_of(models_Genres_probas_of([audio_filename], "paths"))))
  mood =  mood_decoding(p_vote_maj(Votes_of(models_Moods_probas_of([audio_filename], "paths"))))
  df =  filtered_df = filtered_df = df[
    (df['mood'].str.lower() == mood.lower()) &
    (df['genre'].str.lower() == genre.lower())
]
 # Tirage aléatoire de n morceaux (ou moins si pas assez)
  playlist = filtered_df.sample(n=min(15, len(filtered_df)), random_state=42)
  return playlist

# PHASE 3 : Application

### PREDICTION

In [None]:
# 3 ----------
dataset_path = "/content/drive/MyDrive/Artishow/Data/genres_original"
subfolders = os.listdir(dataset_path)
print("Sous-dossiers:", subfolders)
audio_files = glob.glob(f"{dataset_path}/**/*.wav", recursive=True)
# 4 ----------
genre_mapping = {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}
model_mapping = {1:"SVM", 2:"Logistic Regression", 3:"Random Forest", 4:"Gradient Boosting", 5:"K-Nearest Neighbors", 6:"VGG-16"}
inv_genre_mapping = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [None]:
#deezer
import requests
import io
def load_audio_from_url(url, sr=22050, duration=30):
    response = requests.get(url)
    audio_bytes = io.BytesIO(response.content)
    y, sr = librosa.load(audio_bytes, sr=sr, duration=duration)
    return y, sr

# X / features
def Features(y, duration=30, sr=22050):
  def stats(feature):
      return list(map(float, np.mean(feature, axis=1))) + list(map(float, np.std(feature, axis=1)))
  # Extraction des features
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
  rms = librosa.feature.rms(y=y)
  spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
  bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
  contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
  flatness = librosa.feature.spectral_flatness(y=y)
  rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
  tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
  zero_crossing = librosa.feature.zero_crossing_rate(y=y)
  tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
  features = []
  for f in [mfcc, rms, spectral_centroid, bandwidth, contrast, flatness, rolloff, tonnetz, zero_crossing]:
      features.extend(stats(f))
  features.append(float(tempo))
  return features

def Features_of(container, category, duration=30, sr=22050):
  print('Extracting features :', end='')
  N_f = len(container)
  X = [] # features
  Y = [] # genres
  for i in range(N_f):
    if category == "paths":
      path = container[i]
      y, sr = librosa.load(path, sr=sr, duration=duration)
    elif category == "urls":
      url = container[i]
      y, sr = load_audio_from_url(url)
    features = Features(y)
    X.append(features)
    print(f'{i+1}✓', end='')
  print('')
  return X

def gtzan_Genres_of(files, duration=30, sr=22050):
  print('Extracting genres :', end='')
  N_f = len(files)
  X = [] # features
  Y = [] # genres
  for i in range(N_f):
    file_path = files[i]
    genre = (os.path.splitext(os.path.basename(file_path))[0]).split('.')[0]
    Y.append(genre)
    print(f'{i+1}✓', end='')
  print('')
  return Y

def Genres_of(genres, duration=30, sr=22050):
  return [inv_genre_mapping[g] for g in genres]

In [None]:
# Fichier Pré-entraînés 80-20 F_gtzan
import joblib
Xe1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/X_train.pkl')
Ye1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/Y_train.pkl')
Xt1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/X_test.pkl')
Yt1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/Y_test.pkl')
print(Ye1) # si pbm Artishow/Modèle/Saves contient les bons pkl

In [None]:
# Entrainement Classifiers
# ML ----------------------------------------
from time import sleep
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler

model_svm = SVC(kernel='linear', probability=True)
model_svm.fit(Xe1, Ye1)
model_lr = LogisticRegression()
model_lr.fit(Xe1, Ye1)
model_rf = RandomForestClassifier(n_estimators=103, random_state=42)
model_rf.fit(Xe1, Ye1)
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(Xe1, Ye1)

In [None]:
def models_Genres_probas_of(container, category):
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/scaler.pkl')
  X = Features_of(container, category) #généralisé
  X = scaler.transform(X)
  Y_probas_svm = model_svm.predict_proba(X)
  Y_probas_lr = model_lr.predict_proba(X)
  Y_probas_rf = model_rf.predict_proba(X)
  Y_probas_knn = model_knn.predict_proba(X)
  P = []
  for i in range(len(container)):
    PP = [] # pour chaque musique
    PP.append(Y_probas_svm[i]) #tableau de probas : pour 1 file
    PP.append(Y_probas_lr[i])
    PP.append(Y_probas_rf[i])
    PP.append(Y_probas_knn[i])
    #PP est un tableau de tableaux de probas : pour 1 file
    P.append(PP)
  return P #tableau de même longueur que files

In [None]:
def pos_max(t):
  p1 = 0
  for i in range(len(t)):
    if t[i]>t[p1]:
      p1 = i
  return p1

def Votes_of(P):
  N_f = len(P) #nombre de files
  N_m = len(P[0]) #nombre de modèles
  V = []
  for i in range(N_f):
    VV = []
    for j in range(N_m):
      VV.append(pos_max(P[i][j]))
    V.append(VV)
  return V

def p_vote_maj(V): #prédiction
  N_f = len(V) #nombre de files
  N_m = len(V[0]) #nombre de modèles
  Y = [] # Genres
  for i in range(N_f):
      total_par_genre = [0 for k in range(len(genre_mapping))]
      for j in range(N_m):
          total_par_genre[V[i][j]] += 1
      g = pos_max(total_par_genre)
      Y.append(g)
  return Y


def genre_decoding(t):
  t2 = []
  for i in range(len(t)):
    t2.append(genre_mapping[t[i]])
  return t2

### Moods

In [None]:
mood_mapping = {0: 'dark', 1: 'deep', 2: 'dream', 3: 'emotional', 4: 'epic', 5: 'happy', 6: 'motivational', 7: 'relaxing', 8: 'romantic', 9: 'sad'}

In [None]:
# Fichier Pré-entraînés 80-20 F_gtzan
import joblib
We1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/X_train.pkl')
Me1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/Y_train.pkl')
Wt1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/X_test.pkl')
Mt1 = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/Y_test.pkl')
print(Ye1) # si pbm Artishow/Modèle/Saves contient les bons pkl

In [None]:
# Entrainement Classifiers (1)
# ML ----------------------------------------
mood_svm = SVC(kernel='linear', probability=True)
mood_svm.fit(We1, Me1)
mood_lr = LogisticRegression()
mood_lr.fit(We1, Me1)
mood_rf = RandomForestClassifier(n_estimators=103, random_state=42)
mood_rf.fit(We1, Me1)
mood_knn = KNeighborsClassifier(n_neighbors=5)
mood_knn.fit(We1, Me1)

In [None]:
def models_Moods_probas_of(container, category):
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')
  X = Features_of(container, category) #généralisé
  X = scaler.transform(X)
  Y_probas_svm = mood_svm.predict_proba(X)
  Y_probas_lr = mood_lr.predict_proba(X)
  Y_probas_rf = mood_rf.predict_proba(X)
  Y_probas_knn = mood_knn.predict_proba(X)
  P = []
  for i in range(len(container)):
    PP = []
    PP.append(Y_probas_svm[i]) #tableau de probas : pour 1 file
    PP.append(Y_probas_lr[i])
    PP.append(Y_probas_rf[i])
    PP.append(Y_probas_knn[i])
    P.append(PP)
  return P

In [None]:
def mood_decoding(t):
  t2 = []
  for i in range(len(t)):
    t2.append(mood_mapping[t[i]])
  return t2

### Etiquettage - Labelling

In [None]:
import os

def extract_track_id(filepath):
    # Vérifier que filepath existe et est un fichier
    if not os.path.isfile(filepath):
        print(f"Attention : '{filepath}' n'est pas un fichier valide.")
        return None

    filename = os.path.basename(filepath)  # extrait le nom de fichier
    try:
        # Supposons que l'ID est la première partie avant un '_'
        track_id_str = filename.split('_')[0]
        track_id = int(track_id_str)  # convertir en int
        return track_id
    except Exception as e:
        print(f"Erreur extraction ID dans '{filename}': {e}")
        return None


In [None]:
def f_mood_tristan(track_id):
  return mood_decoding(p_vote_maj(Votes_of(models_Moods_probas_of(audio_files[:2], "paths"))))

In [None]:
def mood_for_song(filepath):
  return f_mood_tristan(extract_track_id(filepath))

In [None]:
from google.colab import files

files.download('/content/drive/MyDrive/Artishow/Modèle/ML/knn_model.pkl')
files.download('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')


In [None]:
knn_model = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML/knn_model.pkl')
scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')

def pred_musique_mood_knn(file_path):
  extract_features(file_path, duration=30, sr=22050)
  df_test = pd.DataFrame(X)
  scaler = joblib.load('/content/drive/MyDrive/Artishow/Modèle/ML_moods/scaler.pkl')
  df_test_scaled = scaler.transform(df_test)
  prediction = knn_model.predict(df_test_scaled)
  print(f"Prédiction (Mood) : {mood_mapping[int(prediction[0])]}")
  X.clear()
  return mood_mapping[int(prediction[0])]

In [None]:
df_audio['mood'] = df_audio['filepath'].apply(pred_musique_mood_knn)

In [None]:
df_audio.to_csv('/content/drive/MyDrive/Artishow/Modèle/df_audio.csv', index=False)

In [None]:
df_audio

In [None]:
import concurrent.futures

def mood_for_song(filepath):
    # ta fonction d'extraction mood, par exemple :
    track_id = extract_track_id(filepath)
    if track_id is None:
        return None
    return f_mood_tristan(track_id)

filepaths = df_audio['filepath'].tolist()

with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    moods = list(executor.map(mood_for_song, filepaths))

df_audio['mood'] = moods


In [None]:
df_audio.head()

In [None]:
import pandas as pd
from google.colab import drive



# --- ÉTAPE 2 : Définir le chemin et sauvegarder ---

# Définissez où vous voulez sauvegarder le fichier et quel nom lui donner.
# '/content/drive/MyDrive/' est le chemin vers la racine de "Mon Drive".
# Vous pouvez aussi créer des sous-dossiers, ex: '/content/drive/MyDrive/MonProjet/donnees_traitees.csv'
nom_du_fichier = 'df_audio.csv'
chemin_de_sauvegarde = f'/content/drive/MyDrive/Artishow/{nom_du_fichier}'

print(f"\nSauvegarde du DataFrame vers : {chemin_de_sauvegarde}")

# On utilise la fonction to_csv()
# df_audio est le nom de votre DataFrame
# index=False est TRÈS important pour éviter de sauvegarder l'index de Pandas
# dans une colonne inutile.
try:
    df_audio.to_csv(chemin_de_sauvegarde, index=False)
    print("\n✅ Sauvegarde terminée avec succès !")
    print(f"Vous pouvez trouver votre fichier '{nom_du_fichier}' à la racine de votre Google Drive.")

except Exception as e:
    print(f"\n❌ Une erreur est survenue lors de la sauvegarde : {e}")


In [None]:
df_audio = pd.read_csv('/content/drive/MyDrive/Artishow/df_audio.csv')