In [1]:
import os
import cv2 as cv
import pandas as pd
import numpy as np
from python_speech_features import mfcc
from tsfresh.feature_extraction import extract_features
import torch
import soundfile as sf
from transformers import Wav2Vec2ForPreTraining, Wav2Vec2FeatureExtractor, Wav2Vec2Model

In [15]:
def get_audio_embeddings(audio_path, model_path):
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
        model = Wav2Vec2Model.from_pretrained(model_path)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        model.eval()
        y, sample_rate = sf.read(audio_path)

        length = len(y)
        # print(sample_rate)
        if length > 160000:
            y = y[0:160000]
        elif length <= 160000:
            y = np.concatenate((y, np.zeros(160000-length)))
        mfcc_feat = mfcc(y, sample_rate)
        mfcc_feat = mfcc_feat.reshape(1, -1)
        # print(mfcc_feat.shape)
        input_values = feature_extractor(y, return_tensors="pt").input_values
        input_values = input_values.to(device)
        with torch.no_grad():
            
            outputs = model(input_values)
            last_hidden_state = outputs.last_hidden_state
            embed = last_hidden_state.detach().cpu().numpy().reshape(1, -1)
            print(embed.shape)
            return embed, mfcc_feat
        

In [3]:
root = os.path.dirname(os.getcwd())
print("root direction", root)

label_path = root + '/muldataset/label.csv'
print(label_path)

df = pd.read_csv(label_path)
# print(df)
df_folder = df['folder'].values.tolist()
df_dep = df['depression'].values.tolist()
df_inter = df['interpersonal tension'].values.tolist()
df_anx = df['anxiety'].values.tolist()
df_obs = df['obsessive-compulsive'].values.tolist()
df_par = df['paranoia'].values.tolist()
df_hos = df['hostility'].values.tolist()
df_aca = df['academic stress'].values.tolist()
df_mal = df['maladjustment'].values.tolist()
df_emo = df['emotional imbalance'].values.tolist()
df_psy = df['psychological imbalance'].values.tolist()
df_label = df['overall condition'].values.tolist()
df_sui = df['suicidal tendency'].values.tolist()
# print(df_folder)
print(df_folder[409])
# labels = df_label[:410]
# labels = df_label[410:]
labels = df_label
print(np.array(labels).shape)

root direction f:\Project\AMH
f:\Project\AMH/muldataset/label.csv
00002-1333
(1024,)


In [4]:
def get_voice_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_mfcc = df['MFCC'].values.tolist()
    
    return df_mfcc

def get_wav2vec(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_wav2vec = df['wav2vec'].values.tolist()
    
    return df_wav2vec

if __name__ == '__main__': 
    mfcc_ft = []
    wav2vec = []

    root = os.path.dirname(os.getcwd())
    print("root direction", root)
    dataset = os.listdir(root + '/muldataset')
    del dataset[-1]
    for sample in dataset:
        # if '00002' in sample:
        #     continue
        sample_path = root + '/muldataset' + '/' + sample #样本文件夹
        # print(sample_path)
        sample_file = os.listdir(sample_path) #样本文件夹内文件列表
        # print(sample_file)
        for detail in sample_file:
                sample_path = root + '/muldataset' + '/' + sample
                if 'mfcc' in detail:
                    print(sample_path + '/' + detail)
                    temp_mfcc = get_voice_feature(sample_path + '/' + detail)
                    mfcc_ft.append(temp_mfcc)
                    print(np.array(mfcc_ft).shape)

                if 'wav2vec' in detail:
                    print(sample_path + '/' + detail)
                    temp_wav2vec = get_wav2vec(sample_path + '/' + detail)
                    wav2vec.append(temp_wav2vec)
                    print(np.array(wav2vec).shape)

         
    # iris = np.array(iris).reshape(-1, 1)
   
    print("MFCC set:{mfcc}".format(mfcc = np.array(mfcc_ft).shape))
    print("Wav2vec set:{wav}".format(wav = np.array(wav2vec).shape))
    print("Label set:{label}".format(label = np.array(labels).shape))

root direction f:\Project\AMH
f:\Project\AMH/muldataset/00002-0101/mfcc.csv
(1, 12987)
f:\Project\AMH/muldataset/00002-0101/wav2vec.csv
(1, 1024)
f:\Project\AMH/muldataset/00002-0102/mfcc.csv
(2, 12987)
f:\Project\AMH/muldataset/00002-0102/wav2vec.csv
(2, 1024)
f:\Project\AMH/muldataset/00002-0103/mfcc.csv
(3, 12987)
f:\Project\AMH/muldataset/00002-0103/wav2vec.csv
(3, 1024)
f:\Project\AMH/muldataset/00002-0104/mfcc.csv
(4, 12987)
f:\Project\AMH/muldataset/00002-0104/wav2vec.csv
(4, 1024)
f:\Project\AMH/muldataset/00002-0105/mfcc.csv
(5, 12987)
f:\Project\AMH/muldataset/00002-0105/wav2vec.csv
(5, 1024)
f:\Project\AMH/muldataset/00002-0106/mfcc.csv
(6, 12987)
f:\Project\AMH/muldataset/00002-0106/wav2vec.csv
(6, 1024)
f:\Project\AMH/muldataset/00002-0107/mfcc.csv
(7, 12987)
f:\Project\AMH/muldataset/00002-0107/wav2vec.csv
(7, 1024)
f:\Project\AMH/muldataset/00002-0108/mfcc.csv
(8, 12987)
f:\Project\AMH/muldataset/00002-0108/wav2vec.csv
(8, 1024)
f:\Project\AMH/muldataset/00002-0109/mfcc.

In [16]:
wav2vec = []
MFCC = []
# voice_length = 160000

model_path = "./"
root = os.path.dirname(os.getcwd())
print("root direction", root)

dataset = os.listdir(root + '/muldataset')
# print(dataset)
del dataset[-1]

for sample in dataset:
    temp_MFCC = []
    temp_wav2vec = []
    judge = 0
    sample_path = root + '/muldataset' + '/' + sample
    print(sample_path)
    sample_file = os.listdir(sample_path)
    # print(sample_file)

    for detail in sample_file:
        # if 'mfcc.csv' in detail:
        #     judge = 1
        #     break
        if '.wav' in detail:
            detail_path = sample_path + '/' + detail
            print(detail_path)
            audio_embeddings, mfcc_fe = get_audio_embeddings(detail_path, model_path)
            temp_MFCC.append(mfcc_fe)
            # temp_LOGfbank.append(temp_2)
            temp_wav2vec.append(audio_embeddings)
            # print(np.array(MFCC).shape)
        else:
            continue

    if judge == 1:
        continue

    avg_MFCC = np.mean(temp_MFCC, axis=0)
    avg_wav2vec = np.mean(temp_wav2vec, axis=0)
    MFCC.append(avg_MFCC)
    wav2vec.append(avg_wav2vec)
    print(np.array(MFCC).shape)
    print(np.array(wav2vec).shape)

    MFCC = np.array(MFCC).reshape(-1, 1)
    wav2vec = np.array(wav2vec).reshape(-1, 1)
            
    # 写入csv\
    name = ['MFCC']
    landmarks = pd.DataFrame(columns=name, data=MFCC)
        # print(landmarks)
        # print(face_x)
    landmarks.to_csv(sample_path + '/mfcc.csv', encoding='gbk')
    name = ['wav2vec']
    landmarks = pd.DataFrame(columns=name, data=wav2vec)
        # print(landmarks)
        # print(face_x)
    landmarks.to_csv(sample_path + '/wav2vec.csv', encoding='gbk')
    MFCC = []
    wav2vec = []


You are using a model of type hubert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


root direction f:\Project\AMH
f:\Project\AMH/muldataset/00002-0101
f:\Project\AMH/muldataset/00002-0101/wKgIb2CiGLSAWWhkAAQ9AHdR2yQ370.wav


It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
You are using a model of type hubert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


(1, 383232)
f:\Project\AMH/muldataset/00002-0101/wKgIb2CiGMaASyAkAAXhAFmuv0Q219.wav


It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
You are using a model of type hubert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


(1, 383232)
f:\Project\AMH/muldataset/00002-0101/wKgIb2CiGNuAHMDkAAdnANG2ozo203.wav


It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
You are using a model of type hubert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


(1, 383232)
f:\Project\AMH/muldataset/00002-0101/wKgIb2CiGO-AfoNsAAJYAK6PxY8561.wav


It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
You are using a model of type hubert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


(1, 383232)
f:\Project\AMH/muldataset/00002-0101/wKgIb2CiGOSAVmDWAAJYAB1O9vs846.wav


KeyboardInterrupt: 

In [9]:
from sklearn import preprocessing
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
from imblearn.under_sampling import RandomUnderSampler 
from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
skf = KFold(n_splits=10)
dataset = np.array(mfcc_ft)
labels = np.array(labels)
sum_score = []
f1 = []
for train_index, test_index in skf.split(dataset, labels):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = dataset[train_index], dataset[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    # min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,10))
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)

    # scaler = preprocessing.StandardScaler()
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)

    ros = RandomOverSampler(random_state=0)
    x_train_resampled, y_train_resampled = ros.fit_resample(x_train, y_train)

    # sm = SMOTE(random_state=0)
    # x_train_resampled, y_train_resampled = sm.fit_resample(x_train, y_train)
    # ada = ADASYN(random_state=0)
    # x_train_resampled, y_train_resampled = ada.fit_resample(x_train, y_train)
    # rus = RandomUnderSampler(random_state=0)
    # x_train_resampled, y_train_resampled = rus.fit_resample(x_train, y_train)
#     smote_enn = SMOTEENN(random_state=0)
#     x_train_resampled, y_train_resampled  = smote_enn.fit_resample(x_train, y_train)
    # smote_tomek = SMOTETomek(random_state=0)
    # x_train_resampled, y_train_resampled = smote_tomek.fit_resample(x_train, y_train)
    # random forest
    RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
    # RF_clf.fit(x_train_resampled, y_train_resampled)
    RF_clf.fit(x_train, y_train)
    score_RF = RF_clf.score(x_test, y_test)
    
    y_predicted = RF_clf.predict(x_test)
    test_f1 = f1_score(y_test, y_predicted, average='weighted')
    print(y_predicted)
    print("RF accuracy:{acc}, f1-score:{f1}".format(acc=score_RF, f1=test_f1))
    
    sum_score.append(score_RF)
    f1.append(test_f1)
print("Max accuracy:{max_acc}, F1-score={max_f1}".format(max_acc=np.max(sum_score), max_f1=np.max(f1)))
print("Min accuracy:{min_acc}, F1-score={min_f1}".format(min_acc=np.min(sum_score), min_f1=np.min(f1)))
print("Avg accuracy:{avg_acc}, F1-score={avg_f1}".format(avg_acc=np.mean(sum_score), avg_f1=np.mean(f1)))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.8640776699029126, f1-score:0.8010720064724918
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.7669902912621359, f1-score:0.6658487143924037
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.7087378640776699, f1-score:0.5879302736098853


KeyboardInterrupt: 

In [15]:
from sklearn import preprocessing
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
from imblearn.under_sampling import RandomUnderSampler 
from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
skf = KFold(n_splits=10)
dataset = np.array(wav2vec)
labels = np.array(labels)
sum_score = []
f1 = []
for train_index, test_index in skf.split(dataset, labels):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = dataset[train_index], dataset[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    # min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-10,10))
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)

    # scaler = preprocessing.StandardScaler()
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)

    # ros = RandomOverSampler(random_state=0)
    # x_train_resampled, y_train_resampled = ros.fit_resample(x_train, y_train)

    # sm = SMOTE(random_state=0)
    # x_train_resampled, y_train_resampled = sm.fit_resample(x_train, y_train)
    # ada = ADASYN(random_state=0)
    # x_train_resampled, y_train_resampled = ada.fit_resample(x_train, y_train)
    # rus = RandomUnderSampler(random_state=0)
    # x_train_resampled, y_train_resampled = rus.fit_resample(x_train, y_train)
#     smote_enn = SMOTEENN(random_state=0)
#     x_train_resampled, y_train_resampled  = smote_enn.fit_resample(x_train, y_train)
    # smote_tomek = SMOTETomek(random_state=0)
    # x_train_resampled, y_train_resampled = smote_tomek.fit_resample(x_train, y_train)
    # random forest
    RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
    # RF_clf.fit(x_train_resampled, y_train_resampled)
    RF_clf.fit(x_train, y_train)
    score_RF = RF_clf.score(x_test, y_test)
    
    y_predicted = RF_clf.predict(x_test)
    test_f1 = f1_score(y_test, y_predicted, average='weighted')
    print(y_predicted)
    print("RF accuracy:{acc}, f1-score:{f1}".format(acc=score_RF, f1=test_f1))
    
    sum_score.append(score_RF)
    f1.append(test_f1)
print("Max accuracy:{max_acc}, F1-score={max_f1}".format(max_acc=np.max(sum_score), max_f1=np.max(f1)))
print("Min accuracy:{min_acc}, F1-score={min_f1}".format(min_acc=np.min(sum_score), min_f1=np.min(f1)))
print("Avg accuracy:{avg_acc}, F1-score={avg_f1}".format(avg_acc=np.mean(sum_score), avg_f1=np.mean(f1)))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.8543689320388349, f1-score:0.796218167030956
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.7669902912621359, f1-score:0.6658487143924037
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
RF accuracy:0.7087378640776699, f1-score:0.5879302736098853
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 