# The Voice of Monetary Policy--Voice part
2024.01.08  石宛青

## 目的：识别美联储FOMC发布会音频的情绪

### 框架：获取音频数据-提取声音特征-训练模型-预测FOMC情绪

#### （1）获取音频数据

 1. Ryerson Audio-Visual Database of Emotional Speech and Song（RAVDESS）  
 该数据集包括24名专业演员（12男，12女），以中性的北美口音说出两个词法匹配的陈述，情绪包括平静、快乐、悲伤、恐惧、愤怒、惊讶、厌恶，每个表情都是在两个层次的情绪强度下产生的。

    文件名标识符：e.g., 03-01-06-01-02-01-12.wav)

     模态（01 = 全 AV，02 = 纯视频，03 = 纯音频）。  
     声道（01 = 语音，02 = 歌曲）。  
     情绪（01 =中性，02 =平静，03 =快乐，04 =悲伤，05 =愤怒，06 =恐惧，07 =厌恶，08 =惊讶）。  
     情绪强度（01 =正常，02 =强烈）。注意：“中性”情绪没有强烈的强度。  
     语句（01 = “孩子们在门边说话”，02 = “狗坐在门边”）("Kids are talking by the door", "Dogs are sitting by the door")  
     重复（01 = 第一次重复，02 = 第二次重复）。  
     演员（01至24。奇数演员是男性，偶数演员是女性）。   


 2. Toronto emotional speech set（TESS）  
 
     文件名标识符：e.g., 03-01-06-back-26.wav)

     模态（01 = 全 AV，02 = 纯视频，03 = 纯音频）。    
     声道（01 = 语音，02 = 歌曲）。    
     情绪（01 =中性，03 =快乐，04 =悲伤，05 =愤怒，06 =恐惧，07 =厌恶，08 =惊讶）。    
     词（in total 200 target words)      
     演员（26和28）。  


 3. FOMC audio 
     3.1 从Youtube下载视频，网址：(https://www.youtube.com/watch?v=HdV2VUNh4E&list=PL159CD41EB36CFE86&ab_channel=FederalReserve）
     平均55分钟，使用插件下可以不需要会员（https://www.gihosoft.com/free-youtube-downloader.html）,下载格式mp4

     3.2 手动切分为开场白，问题、答案。如20190731可切割为一段开场白，24个问题，24段答案。平均1-2min。  
     3.3 将视频转化为音频，MP4→wav (https://www.aconvert.com/audio/)      


#### （2）提取声学特征
 1. 将 .wav   文件转换为单声道、16000Hz格式
 2. 从每一个音频文件中提取大量声学特征
2.1 mfcc（40）音频信号  
2.2 chroma（12）音调  
2.3 mel（128）音频信号的梅尔频谱图  
2.4 contrast（7）描述音频频谱中频段之间对比度的特征。如区分清晰的音乐和嘈杂的环境声音。  
2.5 tonnetz（6）是音频的音高特征，表示音频信号中和声音高有关的信息。  

#### （3）训练模型
 1. 划分训练集、测试集  
将RAVDESS、TESS特征数据中的80%作为训练样本、20%作为测试集
 2. 神经网络模型  
180声音特征→200节点（线性激活）→200节点→分类：5种情绪  

#### （4）预测  
  输入FOMC每段回答的音频特征，使用模型预测情感  

### 代码：处理音频数据-模型

### （1）处理音频数据
引入包。版本要求python3.6-3.9（tensorflow目前支持的版本）

In [3]:
import librosa
import soundfile
import os, glob, subprocess
import pandas as pd
import numpy as np
from pandas import DataFrame

首先，定义了一个名为convert_audios的函数，用于将音频文件转换为指定的格式（单声道，采样率为16,000Hz）：

In [4]:
def convert_audios(path, target_path):
    for dirpath, _, filenames in os.walk(path):#使用os.walk遍历指定路径下的所有文件，找到以.wav结尾的文件，
        for filename in filenames:
            file = os.path.join(dirpath, filename).replace('\\','/')
            if file.endswith(".wav"):
                target_file = target_path+'/'+filename
                if not os.path.isfile(target_file):#如果目标文件已经存在，则跳过转换。
                    command = f"ffmpeg -i {file} -ac 1 -ar 16000 {target_file}"#使用FFmpeg将其转换为单声道、采样率为16,000Hz的格式。
                    subprocess.call(command, shell=True)
                else:
                    pass

接下来，定义了一个名为extract_feature的函数，用于从音频文件中提取声学特征：  
该函数使用librosa库读取音频文件，然后提取MFCC、Chroma、MEL频谱频率、Contrast和Tonnetz等特征。提取的特征被组织成DataFrame，并返回给调用者。

In [44]:
def extract_feature(file_name):
    '''
        Extract the following features
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        Not sure if others are useful: spectral_centroid, flatness, rolloff, etc.
    '''
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        #Short-time Fourier transform
        stft = np.abs(librosa.stft(X))
        result = np.array([])
        #mfcc
        #音频信号
        mfccs = np.mean(librosa.feature.mfcc(S=stft, sr=sample_rate, n_mfcc=40).T, axis=0)
        mfccs_df = DataFrame(mfccs.reshape(-1, len(mfccs)))
        mfccs_df.columns=['mfccs'+str(i) for i in range(0,len(mfccs))]
        #chroma音调
        chroma = np.mean(librosa.feature.chroma_stft(y=X, sr=sample_rate).T,axis=0)
        chroma_df = DataFrame(chroma.reshape(-1, len(chroma)))
        chroma_df.columns=['chroma'+str(i) for i in range(0,len(chroma))]
        #mel音频信号的梅尔频谱图
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
        mel_df = DataFrame(mel.reshape(-1, len(mel)))
        mel_df.columns=['mel'+str(i) for i in range(0,len(mel))]
        #contrast
        # contrast 是一个描述音频频谱中频段之间对比度的特征。它测量了每个频段相对于整体频谱的对比度，以捕捉音频中的频谱特征。对比度特征可能有助于区分不同类型的声音，例如区分清晰的音乐和嘈杂的环境声音。
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        contrast_df = DataFrame(contrast.reshape(-1, len(contrast)))
        contrast_df.columns=['contrast'+str(i) for i in range(0,len(contrast))]
        #tonnetz
        #Tonnetz 是音频的音高特征，表示音频信号中和声音高有关的信息。
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
        tonnetz_df = DataFrame(tonnetz.reshape(-1, len(tonnetz)))
        tonnetz_df.columns=['tonnetz'+str(i) for i in range(0,len(tonnetz))]
    return mfccs_df, chroma_df, mel_df, contrast_df, tonnetz_df

在https://blog.csdn.net/m0_47449768/article/details/130102406 下载ffmpeg软件，并配置path环境变量

手动创建一个空的目标文件夹/traget/training_data，将RAVDESS、RAVDESS转化为目标格式

In [6]:
original_path = 'D:/voice/RAVDESS'
target_path = 'D:/voice/traget/training_data'
convert_audios(original_path, target_path)
#约3min
original_path = 'D:/voice/TESS'
target_path = 'D:/voice/traget/training_data'
convert_audios(original_path, target_path)
#注意：文件名不能有中文、空格等

In [11]:
final_df = DataFrame()

filenames = glob.glob(target_path+'/'+'*.wav')#原文少了一个+'/'
for filename in filenames:
    mfccs, chroma, mel, contrast, tonnetz = extract_feature(filename)
    rows = pd.concat([mfccs, chroma, mel, contrast, tonnetz], axis=1)

    # 从文件名中提取情感标签（对于训练数据）和音频文件标识符（对于预测数据）
    if str(filename).find('training_data') != -1:
        #原文：emotion = filename.split('_')[-1].replace('.wav', '')
        emotion = filename.split('-')[2]
        rows['emotion'] = np.array(emotion)
    elif str(filename).find('prediction_data') != -1:
        fname = filename.split('\\')[-1].replace('.wav', '')#原文是/,不太对
        rows['item'] = np.array(fname)
    #原文final_df = final_df.append(rows)
    final_df = pd.concat([final_df, rows], ignore_index=True)

In [10]:
final_df.to_csv('D:/voice/feature.csv', sep='\t', index=False)

In [13]:
final_df

Unnamed: 0,mfccs0,mfccs1,mfccs2,mfccs3,mfccs4,mfccs5,mfccs6,mfccs7,mfccs8,mfccs9,...,contrast4,contrast5,contrast6,tonnetz0,tonnetz1,tonnetz2,tonnetz3,tonnetz4,tonnetz5,emotion
0,1.022527,0.958868,0.494029,0.307909,0.132615,0.016355,-0.053159,-0.073568,-0.080976,-0.041067,...,17.426861,17.076383,15.520328,-0.042179,0.034978,0.012448,-0.042395,0.004703,-0.001524,1
1,0.288416,0.311845,0.235072,0.202644,0.187299,0.173715,0.143612,0.137624,0.114782,0.088920,...,15.573664,15.925498,15.141866,-0.000143,-0.015157,-0.063598,0.014533,0.012302,0.001000,1
2,0.406408,0.329117,0.183439,0.182976,0.133141,0.107178,0.062055,0.030202,-0.004246,-0.001801,...,17.362159,16.831559,16.501101,-0.055107,0.034173,-0.087559,0.015745,0.035862,0.000829,1
3,1.474276,1.524645,0.974791,0.732221,0.644528,0.569810,0.358410,0.111121,-0.048382,-0.038311,...,16.342105,17.730312,14.985024,-0.021259,0.054719,-0.040669,0.023162,-0.014577,-0.014232,1
4,0.522091,0.463887,0.341587,0.267636,0.180913,0.114630,0.026273,0.023030,-0.018288,-0.055002,...,18.062980,17.388405,16.019585,0.010363,0.023506,-0.028387,0.082039,0.012937,0.037996,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4234,8.490258,7.653400,4.493712,2.629095,1.369422,-0.511180,-1.275612,-0.488112,-0.736113,-0.736116,...,21.225740,22.143100,15.800582,0.046143,-0.004746,-0.030455,-0.014472,0.026051,0.013821,8
4235,6.096761,5.610900,4.078144,2.305208,0.619494,0.451949,0.301742,0.249073,-0.063706,-0.400599,...,20.431165,19.394712,17.667970,0.070486,0.102015,0.087497,0.000688,0.043016,-0.003614,8
4236,9.083913,7.782488,5.288871,3.342988,2.207076,1.257422,0.055670,-0.548027,-0.770778,-0.670887,...,17.811781,18.966005,15.217570,0.012104,-0.000034,-0.039569,0.057701,-0.004201,0.004223,8
4237,4.847407,4.084107,3.695012,2.926731,1.594043,1.437396,0.809804,0.532656,0.077507,-0.040866,...,19.969279,19.172665,16.683653,-0.003896,0.028683,0.056708,0.004761,0.017381,-0.011286,8


In [9]:
df.to_csv('D:/voice/feature.csv', sep='\t', index=False)

手动创建一个空的目标文件夹/traget/prediction_data，将FOMC转化为目标格式

In [14]:
original_path = r'D:\voice\FOMC'
target_path = r'D:\voice\target\prediction_data'
convert_audios(original_path, target_path)

In [15]:
final_df = DataFrame()

filenames = glob.glob(target_path+'/'+'*.wav')#原文少了一个+'/'
for filename in filenames:
    mfccs, chroma, mel, contrast, tonnetz = extract_feature(filename)
    rows = pd.concat([mfccs, chroma, mel, contrast, tonnetz], axis=1)

    # 从文件名中提取情感标签（对于训练数据）和音频文件标识符（对于预测数据）
    if str(filename).find('training_data') != -1:
        #原文：emotion = filename.split('_')[-1].replace('.wav', '')
        emotion = filename.split('-')[2]
        rows['emotion'] = np.array(emotion)
    elif str(filename).find('prediction_data') != -1:
        fname = filename.split('\\')[-1].replace('.wav', '')#原文是/,不太对
        rows['item'] = np.array(fname)
    
    #final_df = final_df.append(rows)
    final_df = pd.concat([final_df, rows], ignore_index=True)
    
final_df.to_csv('D:/voice/feature_prediction.csv', sep='\t', index=False)

In [16]:
final_df

Unnamed: 0,mfccs0,mfccs1,mfccs2,mfccs3,mfccs4,mfccs5,mfccs6,mfccs7,mfccs8,mfccs9,...,contrast4,contrast5,contrast6,tonnetz0,tonnetz1,tonnetz2,tonnetz3,tonnetz4,tonnetz5,item
0,5.520031,4.823277,2.519659,1.37579,1.250551,1.212689,0.507529,0.363239,0.463409,0.474395,...,16.01896,17.639429,15.605247,-0.04227,-0.014227,-0.020699,-0.038659,-0.022382,-0.003938,20110622_1
1,9.383839,8.365168,4.148173,1.992658,1.836,1.885299,0.91986,0.64828,0.61445,0.432805,...,17.178497,20.117435,15.817274,-0.01143,0.011601,-0.017634,-0.051248,0.004756,-0.004221,20110622_2
2,8.130367,7.06955,3.839147,1.882912,1.59232,1.49445,0.501571,0.352009,0.41723,0.490138,...,17.335025,19.589634,15.808904,-0.025843,-0.006595,0.001303,0.011228,-0.010676,-0.012452,20110622_3
3,10.547326,8.959697,3.60521,1.777991,2.072085,2.201066,0.990699,0.778654,1.064813,1.097703,...,17.349606,21.141986,16.211789,-0.020132,-0.004959,-5.1e-05,-0.000602,-0.008769,-0.009025,20110622_4
4,8.259261,7.962046,4.19398,2.526863,1.938031,1.618798,0.947708,0.770062,0.732871,0.404933,...,17.420084,19.161898,15.782131,-0.018666,0.019875,-0.010611,-0.034765,0.001899,-0.013111,20110622_5
5,3.772644,3.019469,1.969844,1.75669,1.475093,1.313248,0.922565,0.946543,0.689499,0.482501,...,16.633047,17.794529,16.157175,0.003591,0.00742,0.027017,-0.023302,-0.01435,-0.008195,20190731_1
6,5.260175,4.152248,2.736296,2.672591,2.143484,1.869599,1.326102,1.226196,0.776981,0.426033,...,16.435273,17.839591,16.444682,0.011317,-0.002543,0.031038,-0.016555,-0.013801,-0.007346,20190731_2
7,4.875129,4.165122,2.616739,2.305677,1.956817,1.802361,1.360197,1.336472,0.955871,0.582678,...,16.631618,18.058242,16.392727,0.015156,0.004705,0.018532,-0.011342,-0.016616,-0.007998,20190731_3
8,4.667109,4.149654,2.780008,2.448769,1.989738,1.848565,1.424531,1.212157,0.929775,0.670839,...,16.803352,18.052364,16.445027,0.008012,-0.002289,0.019714,-0.034944,-0.020906,-0.006386,20190731_4
9,3.96747,3.745574,2.532434,2.07091,1.697604,1.617044,1.203158,0.934572,0.643697,0.460003,...,16.637221,17.754412,16.293434,-0.003763,0.007541,0.025592,-0.010793,-0.022249,-0.011543,20190731_5


### (2)模型

In [17]:
import math
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD, RMSprop, Adam, Adadelta, Adagrad, Adamax, Nadam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, History, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix

保留5种情绪：{'happy': 0, 'ps': 1, 'neutral': 2, 'sad': 3, 'angry': 4}

In [19]:
df = pd.read_csv('D:/voice/feature.csv', sep='\t')
df = df[~((df['emotion'] == 2) | (df['emotion'] == 6) | (df['emotion'] == 7))]
df['emotion'] = df['emotion'].replace({1: 2, 3: 0, 4: 3, 5: 4, 8: 1})
df

Unnamed: 0,mfccs0,mfccs1,mfccs2,mfccs3,mfccs4,mfccs5,mfccs6,mfccs7,mfccs8,mfccs9,...,contrast4,contrast5,contrast6,tonnetz0,tonnetz1,tonnetz2,tonnetz3,tonnetz4,tonnetz5,emotion
0,1.022527,0.958868,0.494029,0.307909,0.132615,0.016355,-0.053159,-0.073568,-0.080976,-0.041067,...,17.426861,17.076383,15.520328,-0.042179,0.034978,0.012448,-0.042395,0.004703,-0.001524,2
1,0.288416,0.311845,0.235072,0.202644,0.187299,0.173715,0.143612,0.137624,0.114782,0.088920,...,15.573664,15.925498,15.141866,-0.000143,-0.015157,-0.063598,0.014533,0.012302,0.001000,2
2,0.406408,0.329117,0.183439,0.182976,0.133141,0.107178,0.062055,0.030202,-0.004246,-0.001801,...,17.362159,16.831559,16.501101,-0.055107,0.034173,-0.087559,0.015745,0.035862,0.000829,2
3,1.474276,1.524645,0.974791,0.732221,0.644528,0.569810,0.358410,0.111121,-0.048382,-0.038311,...,16.342105,17.730312,14.985024,-0.021259,0.054719,-0.040669,0.023162,-0.014577,-0.014232,2
4,0.522091,0.463887,0.341587,0.267636,0.180913,0.114630,0.026273,0.023030,-0.018288,-0.055002,...,18.062980,17.388405,16.019585,0.010363,0.023506,-0.028387,0.082039,0.012937,0.037996,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4234,8.490258,7.653400,4.493712,2.629095,1.369422,-0.511180,-1.275612,-0.488112,-0.736113,-0.736116,...,21.225740,22.143100,15.800582,0.046143,-0.004746,-0.030455,-0.014472,0.026051,0.013821,1
4235,6.096761,5.610900,4.078144,2.305208,0.619494,0.451949,0.301742,0.249073,-0.063706,-0.400599,...,20.431165,19.394712,17.667970,0.070486,0.102015,0.087497,0.000688,0.043016,-0.003614,1
4236,9.083913,7.782488,5.288871,3.342988,2.207076,1.257422,0.055670,-0.548027,-0.770778,-0.670887,...,17.811781,18.966005,15.217570,0.012104,-0.000034,-0.039569,0.057701,-0.004201,0.004223,1
4237,4.847407,4.084107,3.695012,2.926731,1.594043,1.437396,0.809804,0.532656,0.077507,-0.040866,...,19.969279,19.172665,16.683653,-0.003896,0.028683,0.056708,0.004761,0.017381,-0.011286,1


In [20]:
def split_data(df, train_set):
    '''
    分割数据集为训练集和测试集，并保持各个情感类别的平衡
    
    参数:
        df (DataFrame): 包含音频特征和情感标签的数据框
        train_set (float): 训练集的比例
        
    返回:
        x_train (numpy array): 训练集特征
        y_train (numpy array): 训练集标签
        x_test (numpy array): 测试集特征
        y_test (numpy array): 测试集标签
    '''

    # Uncomment to drop a feature
##    df = df.drop([col for col in df.columns if "mfccs" in col], axis=1)
##    df = df.drop([col for col in df.columns if "chroma" in col], axis=1)
##    df = df.drop([col for col in df.columns if "mel" in col], axis=1)
    df = df.drop([col for col in df.columns if "contrast" in col], axis=1)
    df = df.drop([col for col in df.columns if "tonnetz" in col], axis=1)
    # 为什么要 drop 这两个呢？感觉是试出来的

    '''
    创建平衡的训练样本
    '''
    
    y_df = df['emotion']
    count = []
    for emotion in df.emotion.unique():
        count.append(len(df[df.emotion == emotion]))

    min_count = math.floor(min(count) * train_set)#train_set设训练集比例
    x_train, x_test = pd.DataFrame(), pd.DataFrame()
    y_train, y_test = pd.DataFrame(columns=['emotion']), pd.DataFrame(columns=['emotion'])
    for emotion in df.emotion.unique():
        temp = df.loc[df.emotion == emotion]
        train_temp = temp.sample(n=min_count, random_state=100)
        # left df is the "big" one, right df is the sub-set for training, keep if data only appear in the former (i.e., testing data)
        test_temp = pd.merge(temp, train_temp, how='outer', indicator=True).query('_merge == "left_only"').drop('_merge', 1)
        x_train = x_train.append(train_temp.drop(['emotion'], axis=1))
        y_train = y_train.append(pd.DataFrame(train_temp['emotion']))
        x_test = x_test.append(test_temp.drop(['emotion'], axis=1))
        y_test = y_test.append(pd.DataFrame(test_temp['emotion']))

    print('Training features:{}; Training output:{}; Testing features:{}; Testing output:{}'.format(x_train.shape, y_train.shape, x_test.shape, y_test.shape))
    x_train = x_train.to_numpy()
    y_train = y_train.to_numpy()
    x_test = x_test.to_numpy()
    y_test = y_test.to_numpy()
    return x_train, y_train, x_test, y_test

def test_score(y_test, y_pred):
    '''
    计算模型在测试集上的准确度
    
    参数:
        y_test (numpy array): 测试集的真实标签
        y_pred (numpy array): 模型在测试集上的预测标签
    
    返回:
        accuracy (float): 模型的准确度
    '''
    
    y_pred = np.argmax(y_pred)
    y_test = [np.argmax(i, out=None, axis=None) for i in y_test]
    
    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
    return accuracy

def conf_matrix(y_test, y_pred):
    '''
    计算混淆矩阵
    
    参数:
        y_test (numpy array): 测试集的真实标签
        y_pred (numpy array): 模型在测试集上的预测标签
    
    返回:
        matrix (numpy array): 混淆矩阵
    '''
    
    y_pred = np.argmax(y_pred)
    y_test = [np.argmax(i, out=None, axis=None) for i in y_test]
    
    matrix = confusion_matrix(y_test, y_pred)
    return matrix

In [36]:
x_train, y_train, x_test, y_test = split_data(df, train_set=0.8)

###one hot coder
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_train



Training features:(1980, 180); Training output:(1980, 1); Testing features:(882, 180); Testing output:(882, 1)


array([[0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)

In [37]:
'''
创建神经网络
'''
# 目标类别的数量
target_class = len(df.emotion.unique())
# 输入的特征长度
input_length = x_train.shape[1]

# 调整参数
dense_units = 200
dropout = 0.3
loss = 'categorical_crossentropy'
optimizer = 'adam'

# 定义神经网络模型
model = Sequential()
model.add(Dense(dense_units, input_dim=input_length))  # 输入层
model.add(Dropout(dropout))  # Dropout 层，用于防止过拟合
model.add(Dense(dense_units))  # 隐藏层
model.add(Dropout(dropout))  # Dropout 层
model.add(Dense(dense_units))  # 隐藏层
model.add(Dropout(dropout))  # Dropout 层
model.add(Dense(target_class, activation='softmax'))  # 输出层，使用 softmax 激活函数
model.compile(loss=loss, optimizer=optimizer,
              metrics=[tf.keras.metrics.CategoricalAccuracy(),
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()])

In [38]:
'''
    Training
'''

model_path = r'D:\voice\model'

checkpointer = ModelCheckpoint(model_path+'/'+'voice_model1.h5', save_best_only=True, monitor='val_loss')
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=20, min_lr=0.000001)
model_training = model.fit(x_train, y_train,
                           batch_size=64,
                           epochs=200,
                           validation_data=(x_test, y_test),
                           callbacks=[checkpointer, lr_reduce])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200


Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200


Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200


Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200


Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [39]:
'''
    Checking accuracy score and confusion matrix
'''
y_pred = np.argmax(model.predict(x_test), axis=-1)
y_test = [np.argmax(i, out=None, axis=None) for i in y_test]

print(accuracy_score(y_true=y_test, y_pred=y_pred))

emotions = ['happy', 'ps', 'neutral', 'sad', 'angry']
emotions2int={'happy': 0, 'ps': 1, 'neutral': 2, 'sad': 3, 'angry': 4}
matrix = confusion_matrix(y_test, y_pred,
                          labels=[emotions2int[e] for e in emotions])
matrix = pd.DataFrame(matrix, index=[f"t_{e}" for e in emotions],columns=[f"p_{e}" for e in emotions])
print(matrix)

0.7947845804988662
           p_happy  p_ps  p_neutral  p_sad  p_angry
t_happy        130    34         11     13        7
t_ps            11   159         11     14        1
t_neutral        0     1         88     10        0
t_sad            5    11         11    166        3
t_angry         14    17          3      4      158


预测FOMC

In [40]:
def processing_data(df):
    df = df.drop([col for col in df.columns if "item" in col], axis=1)
    # Uncomment to drop a feature
##    df = df.drop([col for col in df.columns if "mfccs" in col], axis=1)
##    df = df.drop([col for col in df.columns if "chroma" in col], axis=1)
##    df = df.drop([col for col in df.columns if "mel" in col], axis=1)
    df = df.drop([col for col in df.columns if "contrast" in col], axis=1)
    df = df.drop([col for col in df.columns if "tonnetz" in col], axis=1)

    x_pred = df.to_numpy()
    return x_pred

In [41]:
infile = r'D:\voice\feature_prediction.csv'
outfile = r'D:\voice\result.csv'
emotions = ['happy', 'ps', 'neutral', 'sad', 'angry']
dictionary = {0: 'happy', 1: 'ps', 2: 'neutral', 3: 'sad', 4: 'angry'}
df_pred=pd.read_csv(infile, sep='\t')
x_pred = processing_data(df_pred)

      
model1 = keras.models.load_model(r'D:\voice\model\voice_model.h5')
y_pred = np.argmax(model1.predict(x_pred), axis=-1)
output_data = pd.DataFrame(y_pred, columns = ['emotion'])
output_data['emotion_n'] = output_data['emotion'].map(dictionary)


output_data['item']=df_pred['item']
output_data.to_csv(outfile, sep='\t', index=False)
output_data



Unnamed: 0,emotion,emotion_n,item
0,1,ps,20110622_1
1,1,ps,20110622_2
2,1,ps,20110622_3
3,1,ps,20110622_4
4,1,ps,20110622_5
5,4,angry,20190731_1
6,4,angry,20190731_2
7,4,angry,20190731_3
8,4,angry,20190731_4
9,4,angry,20190731_5


原文：

| Press Conference Date | Speaker   | Positive Responses | Neutral Responses | Negative Responses | Tone |
|------------------------|-----------|---------------------|-------------------|--------------------|------|
| June 22, 2011          | Bernanke  | 19                  | 0                 | 0                  | 1.00 |
| July 31, 2019          | Powell    | 0                   | 0                 | 24                 | -1.00|