### GTZAN資料集處理 
將音訊檔案以3秒為單位切割，處理成26維特徵，以csv方式儲存
### GTZAN Dataset Processing
Split audio files into 3-second segments, process them into 26-dimensional features, and save them as CSV.

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

class GTZANFeatureExtractor:
    """GTZAN音樂特徵提取器 - 生成與你的分類器相容的26維特徵"""
    
    def __init__(self, sample_rate=22050, segment_duration=3):
        self.sample_rate = sample_rate
        self.segment_duration = segment_duration
        self.segment_samples = sample_rate * segment_duration
        
        # 音樂類型標籤
        self.genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 
                      'jazz', 'metal', 'pop', 'reggae', 'rock']
    
    def extract_features(self, audio_path):
        """提取與Kaggle程式碼相同的26維特徵"""
        try:
            # 載入音頻檔案 (30秒, 22050Hz)
            y, sr = librosa.load(audio_path, sr=self.sample_rate, duration=30)
            
            features = []
            
            # 1. chroma_stft (1個特徵)
            chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
            features.append(np.mean(chroma_stft))
            
            # 2. rmse (1個特徵) - 使用 rms
            rms = librosa.feature.rms(y=y)
            features.append(np.mean(rms))
            
            # 3. spectral_centroid (1個特徵)
            spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
            features.append(np.mean(spectral_centroid))
            
            # 4. spectral_bandwidth (1個特徵)
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
            features.append(np.mean(spectral_bandwidth))
            
            # 5. rolloff (1個特徵)
            rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
            features.append(np.mean(rolloff))
            
            # 6. zero_crossing_rate (1個特徵)
            zcr = librosa.feature.zero_crossing_rate(y)
            features.append(np.mean(zcr))
            
            # 7-26. mfcc1 到 mfcc20 (20個特徵)
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
            for i in range(20):
                features.append(np.mean(mfcc[i]))
            
            return features
            
        except Exception as e:
            print(f"處理檔案 {audio_path} 時發生錯誤: {e}")
            return [0] * 26  # 返回零向量
    
    def split_audio_to_segments(self, audio_path):
        """將30秒音頻切割成3秒片段"""
        try:
            # 載入完整音頻
            y, sr = librosa.load(audio_path, sr=self.sample_rate)
            
            segments = []
            # 將30秒切成10個3秒片段
            for i in range(0, len(y), self.segment_samples):
                segment = y[i:i + self.segment_samples]
                
                # 確保片段長度正確
                if len(segment) == self.segment_samples:
                    segments.append(segment)
                elif len(segment) > self.segment_samples * 0.5:  # 至少1.5秒
                    # 補零到3秒
                    padded_segment = np.pad(segment, 
                                          (0, self.segment_samples - len(segment)), 
                                          'constant')
                    segments.append(padded_segment)
            
            return segments
            
        except Exception as e:
            print(f"分割音頻 {audio_path} 時發生錯誤: {e}")
            return []
    
    def extract_features_from_segment(self, audio_segment):
        """從3秒音頻片段提取特徵"""
        try:
            y = audio_segment
            sr = self.sample_rate
            
            features = []
            
            # 基本特徵 (6個)
            chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
            features.append(chroma_stft)
            
            rms = np.mean(librosa.feature.rms(y=y))
            features.append(rms)
            
            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
            features.append(spectral_centroid)
            
            spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
            features.append(spectral_bandwidth)
            
            rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
            features.append(rolloff)
            
            zcr = np.mean(librosa.feature.zero_crossing_rate(y))
            features.append(zcr)
            
            # MFCC 特徵 (20個)
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
            for i in range(20):
                features.append(np.mean(mfcc[i]))
            
            return features
            
        except Exception as e:
            print(f"提取片段特徵時發生錯誤: {e}")
            return [0] * 26
    
    def process_dataset(self, data_path, output_csv='data_features.csv', use_segments=True):
        """處理整個GTZAN資料集"""
        
        print("=== GTZAN 資料集特徵提取開始 ===")
        print(f"使用3秒切割: {use_segments}")
        print(f"輸出檔案: {output_csv}")
        
        all_features = []
        all_labels = []
        all_filenames = []
        
        for genre in self.genres:
            genre_path = os.path.join(data_path, genre)
            
            if not os.path.exists(genre_path):
                print(f"警告: 找不到資料夾 {genre_path}")
                continue
            
            audio_files = [f for f in os.listdir(genre_path) if f.endswith('.wav')]
            print(f"\n處理 {genre} 類型: {len(audio_files)} 個檔案")
            
            for filename in tqdm(audio_files, desc=f"處理 {genre}"):
                file_path = os.path.join(genre_path, filename)
                
                if use_segments:
                    # 使用3秒切割策略
                    segments = self.split_audio_to_segments(file_path)
                    
                    for i, segment in enumerate(segments):
                        features = self.extract_features_from_segment(segment)
                        if features and len(features) == 26:
                            all_features.append(features)
                            all_labels.append(genre)
                            all_filenames.append(f"{filename}_segment_{i}")
                else:
                    # 使用完整30秒音頻
                    features = self.extract_features(file_path)
                    if features and len(features) == 26:
                        all_features.append(features)
                        all_labels.append(genre)
                        all_filenames.append(filename)
        
        # 建立 DataFrame
        columns = ['filename', 'chroma_stft', 'rmse', 'spectral_centroid', 
                  'spectral_bandwidth', 'rolloff', 'zero_crossing_rate']
        
        # 加入 MFCC 欄位
        for i in range(1, 21):
            columns.append(f'mfcc{i}')
        
        columns.append('label')
        
        # 組合資料
        data_rows = []
        for i, (features, label, filename) in enumerate(zip(all_features, all_labels, all_filenames)):
            row = [filename] + features + [label]
            data_rows.append(row)
        
        # 建立 DataFrame 並儲存
        df = pd.DataFrame(data_rows, columns=columns)
        df.to_csv(output_csv, index=False)
        
        print(f"\n=== 特徵提取完成 ===")
        print(f"總樣本數: {len(df)}")
        print(f"特徵維度: {len(df.columns) - 2}")  # 扣除 filename 和 label
        print(f"CSV 檔案已儲存: {output_csv}")
        
        # 顯示各類別統計
        print(f"\n各類別樣本數:")
        for genre in self.genres:
            count = len(df[df['label'] == genre])
            print(f"  {genre}: {count}")
        
        return df

In [None]:
def main():
    """主程式"""
    
    # 設定參數
    GTZAN_DATA_PATH = "genres_wav"  # GTZAN資料集路徑
    OUTPUT_CSV = "data_features.csv"     # 輸出CSV檔案名稱
    USE_3SEC_SEGMENTS = True             # 是否使用3秒切割
    
    # 檢查資料路徑
    if not os.path.exists(GTZAN_DATA_PATH):
        print(f"錯誤: 找不到資料路徑 {GTZAN_DATA_PATH}")
        print("請確認GTZAN資料集已下載並解壓縮到正確位置")
        return
    
    # 建立特徵提取器
    extractor = GTZANFeatureExtractor(
        sample_rate=22050,
        segment_duration=3
    )
    
    # 處理資料集
    df = extractor.process_dataset(
        data_path=GTZAN_DATA_PATH,
        output_csv=OUTPUT_CSV,
        use_segments=USE_3SEC_SEGMENTS
    )
    
    # 驗證結果
    print(f"\n=== 驗證結果 ===")
    print(f"DataFrame 形狀: {df.shape}")
    print(f"欄位名稱: {list(df.columns)}")
    print(f"\n前5行預覽:")
    print(df.head())
    
    print(f"\n✅ 資料處理完成！")

if __name__ == "__main__":
    main()
