In [2]:
import os
import numpy as np
from time import time
import datetime
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas as pd

In [3]:
filepath ='/home/dell/Xinda/SVM/server/Audio/data_opensmile/eGeMAPs_Pleasure.csv'
data = pd.read_csv(filepath)
x = data.iloc[:, :-1]  # 数据特征
y = data.iloc[:,-1]  # 标签

# 将数据划分为训练集和测试集，test_size=.3表示30%的测试集, 随机数种子, 保证可复现性
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3, random_state=420)

# 修正测试集和训练集的索引
for i in [x_train, x_test, y_train, y_test ]:
    i.index  = range(i.shape[0])

# 标准化
scaler_x = StandardScaler()
# scaler_y = StandardScaler()
x_train_std = scaler_x.fit_transform(x_train)
x_test_std = scaler_x.fit_transform(x_test)
# y_train = scaler_y.fit_transform(np.array(y_train).reshape([-1,1])).reshape(-1)
# y_test = scaler_y.fit_transform(np.array(y_test).reshape([-1,1])).reshape(-1)
print(x_train_std.shape)
print(y_train.shape)

(10244, 88)
(10244,)


In [5]:
y_test

0       0.00
1       0.00
2       0.75
3       0.00
4       1.00
        ... 
4386    0.00
4387    0.50
4388    0.00
4389    0.00
4390   -0.25
Name: Pleasure_average, Length: 4391, dtype: float64

In [3]:
def getPvar(vals, mean):
    N = len(vals)
    su = 0
    for i in range(len(vals)):
        su = su + ((vals[i]-mean)*(vals[i]-mean))
    pvar = (1/N) * su
    return pvar

def getMean(vals):
    su = 0
    for i in range(len(vals)):
        su = su + vals[i]
    mean = su/(len(vals))
    return mean

def getMeanofDiffs(xvals, yvals):
    su = 0
    for i in range(len(xvals)):
        su = su + ((xvals[i] - yvals[i])*(xvals[i] - yvals[i]))
    meanodiffs = su/(len(xvals))
    return meanodiffs

def getCCC(pvarfe,pvarexp,meanofdiff,meanfe,meanexp):
    bottom = pvarfe + pvarexp + ((meanfe - meanexp)*(meanfe - meanexp))
    answer = 1 - (meanofdiff / bottom)
    return answer

In [4]:
RMSE_list = []
Spearman_list = []
CCC_list = []
clf = SVR(kernel = 'rbf', gamma = 0.028933584758977834, cache_size=5000)
clf.fit(x_train_std, y_train)
result = clf.predict(x_test_std)

In [5]:
# accuracy
rmse = sqrt(mean_squared_error(y_test, result))
print("(1) Evaluation - RMSE = ", rmse)

# Spearman
data = {'result':result, 'y_test':y_test}
df = pd.DataFrame(data, columns=['result','y_test'])
spearman = df.corr(method="spearman" )
print("(2) Evaluation - Spearmman = \n", spearman)

# CCC
prediction = result
ground = y_test
meanfe = getMean(ground)
meanexp = getMean(prediction)
meanofdiff = getMeanofDiffs(ground,prediction)
pvarfe = getPvar(ground, meanfe)
pvarexp = getPvar(prediction, meanexp)
ccc = getCCC(pvarfe,pvarexp,meanofdiff,meanfe,meanexp)
print('(3) Evaluation - CCC =  ' + str(ccc))


(1) Evaluation - RMSE =  0.32220720886484866
(2) Evaluation - Spearmman = 
          result   y_test
result  1.00000  0.26278
y_test  0.26278  1.00000
(3) Evaluation - CCC =  0.21828095967824146


In [11]:
df = pd.DataFrame(data={"opensmile_prediction_p": prediction, "opensmile_groundtruth_p": y_test.values.tolist()})
df.to_csv("eval_opensmile_pleasure.csv")
print("save success!")

### Gamma-1

In [4]:
times_all = time()
# 调试两个参数 gamma & C  ，默认情况下C为1，通常来说这都是一个合理的参数。
RMSE_list = []
Spearman_list = []
CCC_list = []

gamma_range = np.logspace(-10, 1, 10, base=2) # 返回13个数字，底是2
print("gamma_rang:", gamma_range)

for gamma_item in gamma_range:
    count=1
    time0 = time()
    print("Start-{0}, gamma={1}".format(count, gamma_item))
    count = count+1
    clf = SVR(kernel = 'rbf', gamma = gamma_item, cache_size=5000)
    clf.fit(x_train_std, y_train)
    
    result = clf.predict(x_test_std)
    # accuracy
    rmse = sqrt(mean_squared_error(y_test, result))
    RMSE_list.append(rmse)
    print("(1) Evaluation - RMSE = ", rmse)
    
    # Spearman
    data = {'result':result, 'y_test':y_test}
    df = pd.DataFrame(data, columns=['result','y_test'])
    spearman = df.corr(method="spearman" )
    print("(2) Evaluation - Spearmman = \n", spearman)
    Spearman_list.append(spearman)
    
    # CCC
    prediction = result
    ground = y_test
    meanfe = getMean(ground)
    meanexp = getMean(prediction)
    meanofdiff = getMeanofDiffs(ground,prediction)
    pvarfe = getPvar(ground, meanfe)
    pvarexp = getPvar(prediction, meanexp)
    ccc = getCCC(pvarfe,pvarexp,meanofdiff,meanfe,meanexp)
    CCC_list.append(ccc)
    print('(3) Evaluation - CCC =  ' + str(ccc))
    print(datetime.datetime.fromtimestamp(time()-time0).strftime("%M:%S:%f"))
    print()
    print()

print("Gamma = ", gamma_range[RMSE_list.index(min(RMSE_list))])
print("RMSE = ", min(RMSE_list))
print("Spearman = ", Spearman_list[RMSE_list.index(min(RMSE_list))])
print("CCC = ", CCC_list[RMSE_list.index(min(RMSE_list))]) 
print(datetime.datetime.fromtimestamp(time()-times_all).strftime("%M:%S:%f"))

gamma_rang: [9.76562500e-04 2.27837703e-03 5.31558594e-03 1.24015707e-02
 2.89335848e-02 6.75037337e-02 1.57490131e-01 3.67433623e-01
 8.57243983e-01 2.00000000e+00]
Start-1, gamma=0.0009765625
(1) Evaluation - RMSE =  0.3326840274220979
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.203489
y_test  0.203489  1.000000
(3) Evaluation - CCC =  0.019967913185556574
00:48:591567


Start-1, gamma=0.0022783770304221013
(1) Evaluation - RMSE =  0.3299412519361893
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.218459
y_test  0.218459  1.000000
(3) Evaluation - CCC =  0.062265102614950996
00:47:585686


Start-1, gamma=0.005315585938181161
(1) Evaluation - RMSE =  0.3275639447424996
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.244212
y_test  0.244212  1.000000
(3) Evaluation - CCC =  0.11607412212162804
00:49:770868


Start-1, gamma=0.012401570718501561
(1) Evaluation - RMSE =  0.32295546065857345
(2) E

### Gamma-2

In [5]:
times_all = time()
# 调试两个参数 gamma & C  ，默认情况下C为1，通常来说这都是一个合理的参数。
RMSE_list = []
Spearman_list = []
CCC_list = []

gamma_range = np.linspace(0.012,0.06,10)
print("gamma_rang:", gamma_range)

for gamma_item in gamma_range:
    count=1
    time0 = time()
    print("Start-{0}, gamma={1}".format(count, gamma_item))
    count = count+1
    clf = SVR(kernel = 'rbf', gamma = gamma_item, cache_size=5000)
    clf.fit(x_train_std, y_train)
    
    result = clf.predict(x_test_std)
    # accuracy
    rmse = sqrt(mean_squared_error(y_test, result))
    RMSE_list.append(rmse)
    print("(1) Evaluation - RMSE = ", rmse)
    
    # Spearman
    data = {'result':result, 'y_test':y_test}
    df = pd.DataFrame(data, columns=['result','y_test'])
    spearman = df.corr(method="spearman" )
    print("(2) Evaluation - Spearmman = \n", spearman)
    Spearman_list.append(spearman)
    
    # CCC
    prediction = result
    ground = y_test
    meanfe = getMean(ground)
    meanexp = getMean(prediction)
    meanofdiff = getMeanofDiffs(ground,prediction)
    pvarfe = getPvar(ground, meanfe)
    pvarexp = getPvar(prediction, meanexp)
    ccc = getCCC(pvarfe,pvarexp,meanofdiff,meanfe,meanexp)
    CCC_list.append(ccc)
    print('(3) Evaluation - CCC =  ' + str(ccc))
    print(datetime.datetime.fromtimestamp(time()-time0).strftime("%M:%S:%f"))
    print()
    print()

print("Gamma = ", gamma_range[RMSE_list.index(min(RMSE_list))])
print("RMSE = ", min(RMSE_list))
print("Spearman = ", Spearman_list[RMSE_list.index(min(RMSE_list))])
print("CCC = ", CCC_list[RMSE_list.index(min(RMSE_list))]) 
print(datetime.datetime.fromtimestamp(time()-times_all).strftime("%M:%S:%f"))

gamma_rang: [0.012      0.01733333 0.02266667 0.028      0.03333333 0.03866667
 0.044      0.04933333 0.05466667 0.06      ]
Start-1, gamma=0.012
(1) Evaluation - RMSE =  0.323153576183512
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.269516
y_test  0.269516  1.000000
(3) Evaluation - CCC =  0.18030722490496254
00:57:922748


Start-1, gamma=0.017333333333333333
(1) Evaluation - RMSE =  0.32146228765538243
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.272248
y_test  0.272248  1.000000
(3) Evaluation - CCC =  0.20753538670008898
01:03:050701


Start-1, gamma=0.02266666666666667
(1) Evaluation - RMSE =  0.32152370465346325
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.267279
y_test  0.267279  1.000000
(3) Evaluation - CCC =  0.2172979572524858
01:06:195050


Start-1, gamma=0.028
(1) Evaluation - RMSE =  0.32207263614700327
(2) Evaluation - Spearmman = 
           result    y_test
result  1.0000

### C

In [None]:
times_all = time()
# 调试两个参数 gamma & C  ，默认情况下C为1，通常来说这都是一个合理的参数。
RMSE_list = []
Spearman_list = []
CCC_list = []

c_range = np.logspace(-10, 1, 10, base=2) # 返回13个数字，底是2
print("c_rang:", c_range)

for c_item in c_range:
    count=1
    time0 = time()
    print("Start-{0}, C={1}".format(count, c_item))
    count = count+1
    clf = SVR(kernel = 'rbf', C = c_item, cache_size=5000)
    clf.fit(x_train_std, y_train)
    
    result = clf.predict(x_test_std)
    # accuracy
    rmse = sqrt(mean_squared_error(y_test, result))
    RMSE_list.append(rmse)
    print("(1) Evaluation - RMSE = ", rmse)
    
    # Spearman
    data = {'result':result, 'y_test':y_test}
    df = pd.DataFrame(data, columns=['result','y_test'])
    spearman = df.corr(method="spearman" )
    print("(2) Evaluation - Spearmman = \n", spearman)
    Spearman_list.append(spearman)
    
    # CCC
    prediction = result
    ground = y_test
    meanfe = getMean(ground)
    meanexp = getMean(prediction)
    meanofdiff = getMeanofDiffs(ground,prediction)
    pvarfe = getPvar(ground, meanfe)
    pvarexp = getPvar(prediction, meanexp)
    ccc = getCCC(pvarfe,pvarexp,meanofdiff,meanfe,meanexp)
    CCC_list.append(ccc)
    print('(3) Evaluation - CCC =  ' + str(ccc))
    print(datetime.datetime.fromtimestamp(time()-time0).strftime("%M:%S:%f"))
    print()
    print()

print("C = ", c_range[RMSE_list.index(min(RMSE_list))])
print("RMSE = ", min(RMSE_list))
print("Spearman = ", Spearman_list[RMSE_list.index(min(RMSE_list))])
print("CCC = ", CCC_list[RMSE_list.index(min(RMSE_list))]) 
print(datetime.datetime.fromtimestamp(time()-times_all).strftime("%M:%S:%f"))

c_rang: [9.76562500e-04 2.27837703e-03 5.31558594e-03 1.24015707e-02
 2.89335848e-02 6.75037337e-02 1.57490131e-01 3.67433623e-01
 8.57243983e-01 2.00000000e+00]
Start-1, gamma=0.0009765625
(1) Evaluation - RMSE =  1.0238375965083288
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.230636
y_test  0.230636  1.000000
(3) Evaluation - CCC =  0.0003081731487445394
00:46:579211


Start-1, gamma=0.0022783770304221013
(1) Evaluation - RMSE =  1.0237341352444385
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.228611
y_test  0.228611  1.000000
(3) Evaluation - CCC =  0.0006247198414203181
00:48:352726


Start-1, gamma=0.005315585938181161
(1) Evaluation - RMSE =  1.0235042989450813
(2) Evaluation - Spearmman = 
           result    y_test
result  1.000000  0.231148
y_test  0.231148  1.000000
(3) Evaluation - CCC =  0.0014680425369976158
00:49:491305


Start-1, gamma=0.012401570718501561
(1) Evaluation - RMSE =  1.022957664785456
(2) Eva