In [1]:
import sklearn

print(sklearn.__version__)

1.2.2


In [3]:
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier


# 피쳐 추출 함수 정의
def extract_features(data, sr):
    # 피쳐 추출
    chroma_stft = librosa.feature.chroma_stft(y=data, sr=sr)
    rmse = librosa.feature.rms(y=data)
    spectral_centroid = librosa.feature.spectral_centroid(y=data, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=data, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=data, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(data)

    # 피쳐 결합
    features = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spectral_centroid), np.mean(spectral_bandwidth), np.mean(rolloff), np.mean(zcr)])

    return features.reshape(1, -1)
    f0, voiced_flag, voiced_probs = librosa.pyin(y=data, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    return f0


sr = 44000

# 도 음계
do_data, sr = librosa.load('ado_1.wav', sr=sr)
do_features = extract_features(do_data, sr)

# 레 음계
re_data, sr = librosa.load('are_1.wav', sr=sr)
re_features = extract_features(re_data, sr)

# 미 음계
mi_data, sr = librosa.load('ami_1.wav', sr=sr)
mi_features = extract_features(mi_data, sr)

fa_data, sr = librosa.load('apa_1.wav', sr=sr)
fa_features = extract_features(fa_data, sr)

sol_data, sr = librosa.load('asol_1.wav', sr=sr)
sol_features = extract_features(sol_data, sr)

la_data, sr = librosa.load('ala_1.wav', sr=sr)
la_features = extract_features(la_data, sr)

si_data, sr = librosa.load('asi_1.wav', sr=sr)
si_features = extract_features(si_data, sr)

do2_data, sr = librosa.load('ado_2.wav', sr=sr)
do2_features = extract_features(do2_data, sr)

# 표 형태로 정리
data = {
        'chroma_stft_mean': [round(do_features[0][0], 2), round(re_features[0][0], 2), round(mi_features[0][0], 2),
                             round(fa_features[0][0], 2), round(sol_features[0][0], 2), round(la_features[0][0], 2),
                             round(si_features[0][0], 2), round(do2_features[0][0], 2)],
        'rmse_mean': [round(do_features[0][1], 2), round(re_features[0][1], 2), round(mi_features[0][1], 2),
                      round(fa_features[0][1], 2), round(sol_features[0][1], 2), round(la_features[0][1], 2),
                      round(si_features[0][1], 2), round(do2_features[0][1], 2)],
        'spectral_centroid_mean': [round(do_features[0][2], 2), round(re_features[0][2], 2), round(mi_features[0][2], 2),
                                   round(fa_features[0][2], 2), round(sol_features[0][2], 2), round(la_features[0][2], 2),
                                   round(si_features[0][2], 2), round(do2_features[0][2], 2)],
        'spectral_bandwidth_mean': [round(do_features[0][3], 2), round(re_features[0][3], 2), round(mi_features[0][3], 2),
                                    round(fa_features[0][3], 2), round(sol_features[0][3], 2), round(la_features[0][3], 2),
                                    round(si_features[0][3], 2), round(do2_features[0][3], 2)],
        'rolloff_mean': [round(do_features[0][4], 2), round(re_features[0][4], 2), round(mi_features[0][4], 2),
                         round(fa_features[0][4], 2), round(sol_features[0][4], 2), round(la_features[0][4], 2),
                         round(si_features[0][4], 2), round(do2_features[0][4], 2)],
        'zero_crossing_rate_mean': [round(do_features[0][5], 2), round(re_features[0][5], 2), round(mi_features[0][5], 2),
                                    round(fa_features[0][5], 2), round(sol_features[0][5], 2), round(la_features[0][5], 2),
                                    round(si_features[0][5], 2), round(do2_features[0][5], 2)],
        'label': ['0','1','2','3','4','5','6','7']
       }
df = pd.DataFrame(data)
print(df)

df.to_csv('doremi_features.csv', index=False)

# 사용자가 녹음한 음성 파일 불러오기
user_file = 'are_1.wav'
user_data, sr= librosa.load(user_file, sr=44000)


# CSV 파일에서 데이터 불러오기
data = pd.read_csv('doremi_features.csv')

# 피쳐와 레이블 분리
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# 데이터 스케일링
scaler = StandardScaler()
X_scaled= scaler.fit_transform(X)

dt = DecisionTreeClassifier(random_state=12)
dt.fit(X_scaled, y)


# 사용자 음성의 피쳐 추출 및 스케일링
user_features = extract_features(user_data, sr)
user_feature = scaler.transform(user_features)

# 분류 결과 출력
predicted_label = dt.predict(user_feature)
print('Predicted Label:', predicted_label[0]) 

   chroma_stft_mean  rmse_mean  spectral_centroid_mean  \
0              0.23       0.65                  367.35   
1              0.21       0.65                  391.44   
2              0.20       0.65                  446.28   
3              0.18       0.65                  446.12   
4              0.17       0.65                  498.05   
5              0.17       0.65                  540.57   
6              0.15       0.65                  583.63   
7              0.15       0.65                  625.72   

   spectral_bandwidth_mean  rolloff_mean  zero_crossing_rate_mean label  
0                   308.48        495.47                     0.02     0  
1                   303.11        521.42                     0.02     1  
2                   299.12        559.92                     0.02     2  
3                   301.56        566.80                     0.02     3  
4                   303.58        624.13                     0.02     4  
5                   309.26       

