In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sys
import os
# Suppress TensorFlow GPU warnings and NUMA messages
import tensorflow as tf
import warnings

# Set TensorFlow logging to only show errors
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 0=all, 1=info, 2=warnings, 3=errors only
tf.get_logger().setLevel('ERROR')

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', message='.*NUMA node.*')
warnings.filterwarnings('ignore', message='.*Could not identify NUMA node.*')

sys.path.append(os.path.abspath("../scripts"))

2025-09-09 10:48:28.806565: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
from audio_pipeline import AudioProcessingPipeline

pipeline = AudioProcessingPipeline(
    db_path="data/sql/clean.db",
    output_dir="data/audio/raw",
    processed_dir="data/processed"
)

try:
    print("Processing songs from database...")
    processed_songs = pipeline.process_songs_from_database(
        table_name="all_awards",  # Adjust table name based on your database schema
        # limit=10,             # Process only 5 songs for demo
        force_reprocess=False,  # Use cache to avoid reprocessing
        song_column="song",     # Column name for song titles
        artist_column="artist"  # Column name for artist names
    )
    
    print(f"\n✅ Successfully processed {len(processed_songs)} unique songs")
    
    # Save the results to JSON
    if processed_songs:
        output_file = pipeline.save_processed_metadata(
            processed_songs, 
            "database_processed_songs.json"
        )
        print(f"📁 Saved metadata to: {output_file}")
        
        # Show summary of processed songs
        print("\nProcessed songs summary:")
        for i, song in enumerate(processed_songs, 1):
            print(f"  {i}. {song['artist']} - {song['song_title']} ({song['duration_seconds']:.1f}s)")
    
    # Show processing summary
    pipeline.get_processing_summary()
    
except Exception as e:
    print(f"❌ Error processing database: {e}")
    print("Make sure your database exists and has the correct table structure")

finally:
    pipeline.cleanup()

[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
INFO:feature_extractor:Feature extractor initialized. Models will be loaded on first use.


Initializing feature extractor...
Audio Processing Pipeline initialized
Database: data/sql/clean.db
Output directory: data/audio/raw
Processed directory: data/processed
Cache file: data/processed/processing_cache.json
Processing songs from database...
Found 457 unique song-artist combinations in database

Processing song 1/457
Artist-song combination 'G-Dragon - HOME SWEET HOME' already processed (file: G-Dragon - HOME SWEET HOME.wav), skipping...
Successfully processed: G-Dragon - HOME SWEET HOME

Processing song 2/457
Artist-song combination 'aespa - Whiplash' already processed (file: aespa - Whiplash.wav), skipping...
Successfully processed: aespa - Whiplash

Processing song 3/457
Artist-song combination 'ROSÉ - toxic till the end' already processed (file: ROSÉ - toxic till the end.wav), skipping...
Successfully processed: ROSÉ - toxic till the end

Processing song 4/457
Artist-song combination 'Taeyeon - Letter To Myself' already processed (file: Taeyeon - Letter To Myself.wav), sk

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=audio_file_path)
INFO:feature_extractor:Loading model: tempocnn
INFO:feature_extractor:Successfully loaded model: tempocnn


Audio file: /home/ccaban/Coding/Projects/kpop-music-winners/data/audio/raw/aespa - Up (KARINA).wav
Sample rate: 16000 Hz
Duration: 166.64 seconds
Channels: 1
Data type: float32
✓ Sample rate is correct for Essentia models
True
Processing audio file: aespa - Up (KARINA).wav
Extracting audio features...
Extracting features from: /home/ccaban/Coding/Projects/kpop-music-winners/data/audio/raw/aespa - Up (KARINA).wav
Audio loaded successfully. Duration: 166.64 seconds
Extracting tempo using TempoCNN...


In [None]:
import pandas as pd
import json

with open("../data/processed/demo_database_processed_songs.json", 'r') as f:
    data = json.load(f)

df = pd.json_normalize(data, sep="__")
df.head(5)