In [7]:
import pandas as pd
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

y1, sr = librosa.load("a.wav",sr=44000)

data = pd.read_csv("DF.csv")
data = data.fillna(data.mean())

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# #모델 선택을 위한 비교
# xgb = XGBClassifier(n_estimators=1000, learning_rate=0.02)
# xgb.fit(X_train, y_train)
# xgb_pred = xgb.predict(X_test)
# xgb_acc = accuracy_score(xgb_pred,y_test)
# print("XGBoost Accuracy: {:.2f}%".format(xgb_acc*100))

rfm=RandomForestClassifier(n_estimators=200, max_depth=3)
rfm.fit(X_train, y_train)
y_pred = rfm.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Random Accuracy: {:.2f}%".format(acc*100))

# rfm>xgb 이므로 rfm사용
##############################################################################################


#주파수 데이터
data = pd.read_csv('doremi_fre_boy.csv')

X = data.iloc[:, 0].values.reshape(42, 1)
y = data.iloc[:, 1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

start = (0*sr)
end = (2*sr)
f0, voiced_flag, voiced_probs = librosa.pyin(y1[start:end], fmin=librosa.note_to_hz('C1'), fmax=librosa.note_to_hz('B4'))
valid_pitch = f0[~np.isnan(f0)]

pitch_mean = np.nanmean(valid_pitch)
pitch_mean_reshaped = pitch_mean.reshape(-1, 1)


predicted = dt.predict(pitch_mean_reshaped)  # 분류 결과 출력
print(pitch_mean_reshaped,'-> 예상 최고 옥타브:', predicted)


# MFCC
mfcc = librosa.feature.mfcc(y=y1, sr=sr, n_mfcc=20)

# 스펙트럴 중심주파수
spectral_centroids = librosa.feature.spectral_centroid(y=y1, sr=sr)

# 스펙트럴 대역폭
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y1, sr=sr)

# 스펙트럴 콘트라스트
spectral_contrast = librosa.feature.spectral_contrast(y=y1, sr=sr)

# 스펙트럴 롤오프
spectral_rolloff = librosa.feature.spectral_rolloff(y=y1, sr=sr)

# 평균 주파수
meanfreq = np.mean(spectral_centroids)

# 최고 주파수
maxfreq = np.max(spectral_centroids)

# 최저 주파수
minfreq = np.min(spectral_centroids)

# 음성이 비는 0값 없애고, mfcc는 3차원 배열이므로 평균값을 통해 2차원으로 변경
mfcc_mean = np.mean(mfcc[:, 1:], axis=1).tolist()
spectral_centroids = np.mean(spectral_centroids[spectral_centroids > 0])
spectral_bandwidth = np.mean(spectral_bandwidth[spectral_bandwidth > 0])
spectral_contrast = np.mean(spectral_contrast[spectral_contrast > 0], axis=0).tolist()
spectral_rolloff = np.mean(spectral_rolloff[spectral_rolloff > 0])
meanfreq = np.mean(meanfreq[meanfreq > 0])
maxfreq = np.mean(maxfreq[maxfreq > 0])
minfreq = np.mean(minfreq[minfreq > 0])

# 입력 변수 형식변환
input_features = np.array([mfcc_mean[0],mfcc_mean[1], mfcc_mean[2],mfcc_mean[3],mfcc_mean[4],mfcc_mean[5],mfcc_mean[6],mfcc_mean[7],mfcc_mean[8],mfcc_mean[9],mfcc_mean[10],mfcc_mean[11],mfcc_mean[12],mfcc_mean[13],mfcc_mean[14],mfcc_mean[15],mfcc_mean[16],mfcc_mean[17],mfcc_mean[18],mfcc_mean[19],spectral_centroids, spectral_bandwidth,spectral_contrast, spectral_rolloff, meanfreq,
                           maxfreq, minfreq])
    
prediction=rfm.predict(input_features.reshape(1,-1))
if prediction==1:
    print("톤==여성적")
else:
    print("톤==남성적")

Random Accuracy: 78.26%
[[40.32692097]] -> 예상 최고 옥타브: ['E']
톤==남성적


