In [1]:
#coding=utf-8
import sys 
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
from sklearn.metrics import  accuracy_score
from sklearn.metrics import mean_squared_error

raw_datapath = 'E:\\code\\python\\qoe_model\\raw_data\\3w_data.csv'
data = pd.read_csv(raw_datapath)

column_names = ['InitialBufferTime', 'VideoPlayDuration','StallingRatio', 'VIDEO_BITRATE', 'VIDEO_CLARITY', 'VIDEO_ALL_PEAK_RATE', 
                'VIDEO_AVERAGE_RATE', 'USERBUFFERTIME', 'VIDEOSIZE', 'SCREEN_RESOLUTION_LONG', 'VIDEO_BUFFERING_PEAK_RATE', 
                'EVMOS', 'ELOADING', 'ESTALLING', 'USER_SCORE']
#########################################################
############ 将 name 列的离散数据进行编号 ###############
#########################################################
def class_normalization(name, X):
    
    # name不是list,是str
    a = X[name]
    b = a.value_counts()
    c = b.index

    list1 = []
    list2 = []
    for i in range(len(c)):
        list1.append(i)
        list2.append(c[i])
        
    b = a.replace(list2, list1)
    
    data1 = X.drop([name], axis=1)
    data1.insert(2, name, b)
    
    return data1

##########################################################
#################### 移除 name 列 ########################
##########################################################
def remove_col(name, all_name):
    
    list = []
    for i in range(len(column_names)):
        if column_names[i] != name:
            list.append(column_names[i])
    return list

# 生成每一个batch
def generatebatch(X,Y,n_examples, batch_size): 
    for batch_i in range(n_examples // batch_size): 
        start = batch_i * batch_size 
        end = start + batch_size 
        batch_xs = X[start:end] 
        batch_ys = Y[start:end] 
        yield batch_xs, batch_ys 

from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

name = 'VIDEO_CLARITY'
data1 = class_normalization(name, data)
data1 = shuffle(data1)
data1 = data1.reset_index(drop = True)

X1 = data1[remove_col(name, column_names)]
X2 = data1[[name]]

# 选取第i个分数
Y1 = data1[column_names[11]]
Y2 = data1[column_names[12]]
Y3 = data1[column_names[13]]
Y4 = data1[column_names[14]]

scaler = MinMaxScaler()
X1_data = scaler.fit_transform(X1)
X2_data = OneHotEncoder().fit_transform(X2.values.reshape(-1, 1)).todense()
X_data = np.hstack((X1_data, X2_data))

[raw, col] = X_data.shape
#shape = 8 * 8
shape = 5 * 5
X3_data = np.zeros([raw, shape - col])
X_data = np.hstack((X1_data, X2_data, X3_data)).getA()

# 随机采样25%的数据用于测试，剩下的75%用于构建训练集合。
X1_train, X1_test, y1_train, y1_test = train_test_split(X_data, Y1, test_size=0.25, random_state = 33)
X2_train, X2_test, y2_train, y2_test = train_test_split(X_data, Y2, test_size=0.25, random_state = 33)
X3_train, X3_test, y3_train, y3_test = train_test_split(X_data, Y3, test_size=0.25, random_state = 33)
X4_train, X4_test, y4_train, y4_test = train_test_split(X_data, Y4, test_size=0.25, random_state = 33)

#X_train = X_train.reshape(-1,8,8,1)
#X_test = X_test.reshape(-1,8,8,1)

X1_train = X1_train.reshape(-1,5,5,1)
X1_test = X1_test.reshape(-1,5,5,1)
y1_train = OneHotEncoder().fit_transform(y1_train.values.reshape(-1, 1)).todense().getA()
y1_test_code = OneHotEncoder().fit_transform(y1_test.values.reshape(-1, 1)).todense().getA()

X2_train = X2_train.reshape(-1,5,5,1)
X2_test = X2_test.reshape(-1,5,5,1)
y2_train = OneHotEncoder().fit_transform(y2_train.values.reshape(-1, 1)).todense().getA()
y2_test_code = OneHotEncoder().fit_transform(y2_test.values.reshape(-1, 1)).todense().getA()

X3_train = X3_train.reshape(-1,5,5,1)
X3_test = X3_test.reshape(-1,5,5,1)
y3_train = OneHotEncoder().fit_transform(y3_train.values.reshape(-1, 1)).todense().getA()
y3_test_code = OneHotEncoder().fit_transform(y3_test.values.reshape(-1, 1)).todense().getA()

X4_train = X4_train.reshape(-1,5,5,1)
X4_test = X4_test.reshape(-1,5,5,1)
y4_train = OneHotEncoder().fit_transform(y4_train.values.reshape(-1, 1)).todense().getA()
y4_test_code = OneHotEncoder().fit_transform(y4_test.values.reshape(-1, 1)).todense().getA()

# 使用MBGD算法，设定batch_size为8
batch_size = 4096
tf.reset_default_graph()

# 输入层
keep_prob = tf.placeholder(tf.float32)
#tf_X = tf.placeholder(tf.float32,[None,8,8,1])
tf_X = tf.placeholder(tf.float32,[None,5,5,1])
tf_Y = tf.placeholder(tf.float32,[None,5])

# 卷积层+激活层 
conv_filter_w1 = tf.Variable(tf.random_normal([3, 3, 1, 5])) 
conv_filter_b1 = tf.Variable(tf.random_normal([5])) 
relu_feature_maps1 = tf.nn.relu(tf.nn.conv2d(tf_X, conv_filter_w1, strides=[1, 1, 1, 1], padding='SAME') + conv_filter_b1)

# 池化层
max_pool1 = tf.nn.max_pool(relu_feature_maps1, ksize = [1,3,3,1], strides = [1,2,2,1], padding = 'SAME')

# 卷积层 
conv_filter_w2 = tf.Variable(tf.random_normal([3, 3, 5, 5])) 
conv_filter_b2 = tf.Variable(tf.random_normal([5])) 
conv_out2 = tf.nn.conv2d(relu_feature_maps1, conv_filter_w2, strides = [1, 2, 2, 1], padding = 'SAME') + conv_filter_b2 

# BN归一化层+激活层 
batch_mean, batch_var = tf.nn.moments(conv_out2, [0, 1, 2], keep_dims = True) 
shift = tf.Variable(tf.zeros([5])) 
scale = tf.Variable(tf.ones([5])) 
epsilon = 1e-3 
BN_out = tf.nn.batch_normalization(conv_out2, batch_mean, batch_var, shift, scale, epsilon) 
relu_BN_maps2 = tf.nn.relu(BN_out)

# 池化层
max_pool2 = tf.nn.max_pool(relu_BN_maps2, ksize = [1,3,3,1], strides = [1,2,2,1], padding = 'SAME')

# 将特征图进行展开
max_pool2_flat = tf.reshape(max_pool2, [-1, 2*2*5])

# 全连接层 
fc_w1 = tf.Variable(tf.random_normal([2*2*5,50])) 
fc_b1 = tf.Variable(tf.random_normal([50])) 
fc_out1 = tf.nn.relu(tf.matmul(max_pool2_flat, fc_w1) + fc_b1)

# 输出层 
out_w1 = tf.Variable(tf.random_normal([50,5])) 
out_b1 = tf.Variable(tf.random_normal([5])) 
pred = tf.nn.softmax(tf.matmul(fc_out1, out_w1)+out_b1)

loss = -1*tf.reduce_mean(tf_Y*tf.log(tf.clip_by_value(pred, 1e-11,1.0)))

train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)

y_pred = tf.argmax(pred, 1)
bool_pred = tf.equal(tf.argmax(tf_Y,1), y_pred)
# 准确率
accuracy = tf.reduce_mean(tf.cast(bool_pred, tf.float32))
###################################################################
############################## SCORE 1 ############################
###################################################################
print("######### The train & test process of SCORE1 ##########")
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    for epoch in range(1000): 
        # 迭代1000个周期 
        for batch_xs,batch_ys in generatebatch(X1_train, y1_train, y1_train.shape[0], batch_size): 
            # 每个周期进行MBGD算法 
            sess.run(train_step, feed_dict={tf_X:batch_xs, tf_Y:batch_ys, keep_prob: 0.1}) 
        if(epoch%100==0): 
            res = sess.run(accuracy,feed_dict={tf_X:X1_train, tf_Y:y1_train, keep_prob: 0.1}) 
            #print((epoch,res))
            print("The %d epoch, the accuracy is %f " %(epoch, res))
    res_ypred = y_pred.eval(feed_dict={tf_X:X1_test, tf_Y:y1_test_code, keep_prob: 0.1}).flatten() 
    # 只能预测一批样本，不能预测一个样本 
    print('The result of the test bath is  ', res_ypred)
print("The accurate score of the test bath is  ", accuracy_score(y1_test, res_ypred.reshape(-1, 1)))
print("The MSE of the test bach is  ",  mean_squared_error(y1_test, res_ypred.reshape(-1, 1)))
####################################################################
############################## SCORE 2 #############################
####################################################################
print("######### The train & test process of SCORE2 ##########")
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    for epoch in range(1000): 
        # 迭代1000个周期 
        for batch_xs,batch_ys in generatebatch(X2_train, y2_train, y2_train.shape[0], batch_size): 
            # 每个周期进行MBGD算法 
            sess.run(train_step, feed_dict={tf_X:batch_xs, tf_Y:batch_ys, keep_prob: 0.5}) 
        if(epoch%100==0): 
            res = sess.run(accuracy,feed_dict={tf_X:X2_train, tf_Y:y2_train, keep_prob: 1}) 
            #print((epoch,res))
            print("The %d epoch, the accuracy is %f " %(epoch, res))
    res_ypred = y_pred.eval(feed_dict={tf_X:X2_test, tf_Y:y2_test_code, keep_prob: 1}).flatten() 
    # 只能预测一批样本，不能预测一个样本 
    print('The result of the test bath is  ', res_ypred)
print("The accurate score of the test bath is  ", accuracy_score(y2_test, res_ypred.reshape(-1, 1)))
print("The MSE of the test bach is  ",  mean_squared_error(y2_test, res_ypred.reshape(-1, 1)))
#####################################################################
############################## SCORE 3 ##############################
#####################################################################
print("######### The train & test process of SCORE3 ##########")
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    for epoch in range(1000): 
        # 迭代1000个周期 
        for batch_xs,batch_ys in generatebatch(X3_train, y3_train, y3_train.shape[0], batch_size): 
            # 每个周期进行MBGD算法 
            sess.run(train_step, feed_dict={tf_X:batch_xs, tf_Y:batch_ys, keep_prob: 0.5}) 
        if(epoch%100==0): 
            res = sess.run(accuracy,feed_dict={tf_X:X3_train, tf_Y:y3_train, keep_prob: 1}) 
            #print((epoch,res))
            print("The %d epoch, the accuracy is %f " %(epoch, res))
    res_ypred = y_pred.eval(feed_dict={tf_X:X3_test, tf_Y:y3_test_code, keep_prob: 1}).flatten() 
    # 只能预测一批样本，不能预测一个样本 
    print('The accurate of the train bath is  ', res_ypred)
print("The accurate score of the test bath is  ", accuracy_score(y3_test, res_ypred.reshape(-1, 1)))
print("The MSE of the test bach is  ",  mean_squared_error(y3_test, res_ypred.reshape(-1, 1)))
#####################################################################
############################## SCORE 4 ##############################
#####################################################################
print("######### The train & test process of SCORE4 ##########")
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    for epoch in range(1000): 
        # 迭代1000个周期 
        for batch_xs,batch_ys in generatebatch(X4_train, y4_train, y4_train.shape[0], batch_size): 
            # 每个周期进行MBGD算法 
            sess.run(train_step, feed_dict={tf_X:batch_xs, tf_Y:batch_ys, keep_prob: 0.5}) 
        if(epoch%100==0): 
            res = sess.run(accuracy,feed_dict={tf_X:X4_train, tf_Y:y4_train, keep_prob: 1}) 
            #print((epoch,res))
            print("The %d epoch, the accuracy is %f " %(epoch, res))
    res_ypred = y_pred.eval(feed_dict={tf_X:X4_test, tf_Y:y4_test_code, keep_prob: 1}).flatten() 
    # 只能预测一批样本，不能预测一个样本 
    print('The result of the test bath is  ', res_ypred)
print("The accurate score of the test bath is  ", accuracy_score(y4_test, res_ypred.reshape(-1, 1)))
print("The MSE of the test bach is  ",  mean_squared_error(y4_test, res_ypred.reshape(-1, 1)))

######### The train & test process of SCORE1 ##########
The 0 epoch, the accuracy is 0.033570 
The 100 epoch, the accuracy is 0.852573 
The 200 epoch, the accuracy is 0.936242 
The 300 epoch, the accuracy is 0.972125 
The 400 epoch, the accuracy is 0.986298 
The 500 epoch, the accuracy is 0.992507 
The 600 epoch, the accuracy is 0.995547 
The 700 epoch, the accuracy is 0.997474 
The 800 epoch, the accuracy is 0.998673 
The 900 epoch, the accuracy is 0.999229 
The result of the test bath is   [3 3 2 ..., 4 4 3]
The accurate score of the test bath is   0.000642260757868
The MSE of the test bach is   1.00166987797
######### The train & test process of SCORE2 ##########
The 0 epoch, the accuracy is 0.105549 
The 100 epoch, the accuracy is 0.883831 
The 200 epoch, the accuracy is 0.959536 
The 300 epoch, the accuracy is 0.987325 
The 400 epoch, the accuracy is 0.994905 
The 500 epoch, the accuracy is 0.998416 
The 600 epoch, the accuracy is 0.999229 
The 700 epoch, the accuracy is 0.999443 