In [2]:
import pandas as pd
import numpy as np
import librosa
import os
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, confusion_matrix, classification_report
import joblib
import zipfile
from scipy.io import wavfile
import io
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

zip_path = "/content/drive/MyDrive/audio_train.zip"
csv_path = "/content/drive/MyDrive/train.csv"

df = pd.read_csv(csv_path)
df['path'] = 'train/' + df['fname']  # добавляем префикс для чтения из zip
zip_file = zipfile.ZipFile(zip_path, 'r')

# Извлечение MFCC признаков
def read_wav_from_zip(zip_store, filename):
    with io.BytesIO(zip_store.read(filename)) as bytes_buff:
        sr, wave = wavfile.read(bytes_buff)
    if wave.ndim > 1:
        wave = np.mean(wave, axis=1)  # stereo to mono
    return sr, wave

def extract_mfcc(sr, wave, n_mfcc=13):
    wave = wave.astype(np.float32)
    mfcc = librosa.feature.mfcc(y=wave, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc.T, axis=0)  # усреднение по времени

X, y = [], []
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        sr, wave = read_wav_from_zip(zip_file, row['path'])
        mfcc = extract_mfcc(sr, wave)
        X.append(mfcc)
        y.append(row['label'])
    except Exception as e:
        print(f"Ошибка с файлом {row['path']}: {e}")

features_df = pd.DataFrame(X)
features_df['label'] = y
features_df.to_pickle("/content/drive/MyDrive/features_mfcc.pkl")

# Разделение на train и validation
X = features_df.drop(columns='label')
y = features_df['label']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Обучение модели
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Метрики
def evaluate(model, X, y, name=""):
    y_pred = model.predict(X)
    print(f"--- {name} ---")
    print("F1 Score:", f1_score(y, y_pred, average='weighted'))
    print("Confusion Matrix:")
    print(confusion_matrix(y, y_pred))
    print("Classification Report:")
    print(classification_report(y, y_pred))

evaluate(clf, X_train, y_train, "Train")
evaluate(clf, X_val, y_val, "Validation")

# Тюнинг гиперпараметров
params = {'n_estimators': [100, 200], 'max_depth': [None, 20, 40]}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid=params,
                    cv=3, scoring='f1_weighted', verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Params:", grid.best_params_)
evaluate(grid.best_estimator_, X_val, y_val, "Validation After Tuning")


joblib.dump(grid.best_estimator_, '/content/drive/MyDrive/rf_model.pkl')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


100%|██████████| 5683/5683 [02:42<00:00, 34.94it/s]


--- Train ---
F1 Score: 1.0
Confusion Matrix:
[[150   0   0 ...   0   0   0]
 [  0 146   0 ...   0   0   0]
 [  0   0 118 ...   0   0   0]
 ...
 [  0   0   0 ... 150   0   0]
 [  0   0   0 ...   0 134   0]
 [  0   0   0 ...   0   0 129]]
Classification Report:
                       precision    recall  f1-score   support

      Acoustic_guitar       1.00      1.00      1.00       150
             Applause       1.00      1.00      1.00       146
                 Bark       1.00      1.00      1.00       118
            Bass_drum       1.00      1.00      1.00       145
Burping_or_eructation       1.00      1.00      1.00        90
                  Bus       1.00      1.00      1.00        51
                Cello       1.00      1.00      1.00       149
                Chime       1.00      1.00      1.00        62
             Clarinet       1.00      1.00      1.00       141
    Computer_keyboard       1.00      1.00      1.00        53
                Cough       1.00      1.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best Params: {'max_depth': 20, 'n_estimators': 200}
--- Validation After Tuning ---
F1 Score: 0.4878481930050049
Confusion Matrix:
[[25  0  0 ...  0  1  0]
 [ 0 30  0 ...  0  0  0]
 [ 1  2 20 ...  1  0  0]
 ...
 [ 0  2  1 ... 25  0  0]
 [ 0  0  0 ...  1 19  2]
 [ 3  0  1 ...  0  0 11]]
Classification Report:
                       precision    recall  f1-score   support

      Acoustic_guitar       0.48      0.68      0.56        37
             Applause       0.57      0.81      0.67        37
                 Bark       0.50      0.69      0.58        29
            Bass_drum       0.55      0.64      0.59        36
Burping_or_eructation       0.76      0.57      0.65        23
                  Bus       0.38      0.38      0.38        13
                Cello       0.56      0.65      0.60        37
                Chime       0.78      0.44      0.56        16
             Clarinet       0.57      0.69      0.62        35
    Computer_keyboard       0.33      0.08      0.12       

['/content/drive/MyDrive/rf_model.pkl']