In [1]:
#模拟退火算法
import numpy as np
import gym
import time
import random

def get_action(weights, observation):# 根据权值对当前状态做出决策
    wxb = np.dot(weights[:4], observation) + weights[4] # 计算加权和
    if wxb >= 0:# 加权和大于0时选取动作1，否则选取0
        return 1
    else:
        return 0

def get_sum_reward_by_weights(env, weights):
# 测试不同权值的控制模型有效控制的持续时间（或奖励）
    observation = env.reset() # 重置初始状态
    sum_reward = 0 # 记录总的奖励
    for t in range(200):
        # time.sleep(0.01)
        # env.render()
        action = get_action(weights, observation) # 获取当前权值下的决策动作
        observation, reward, done, info = env.step(action)# 执行动作并获取这一动作下的下一时间步长状态
        sum_reward += reward
        # print(sum_reward, action, observation, reward, done, info)
        if done:# 如若游戏结束，返回
            #print("Episode finished after {} timesteps".format(t+1))
            break
    return sum_reward


def get_weights_by_random_guess():
# 选取随机猜测的5个随机权值
    return np.random.rand(5)

def get_weights_by_hill_climbing(best_weights):
# 通过爬山算法选取权值（在当前最好权值上加入随机值）
    return best_weights + np.random.normal(0, 0.1, 5)

def get_best_result(algo="hill_climbing"):
    env = gym.make("CartPole-v0")
    np.random.seed(10)
    best_reward = 0 # 初始最佳奖励
    best_weights = np.random.rand(5) # 初始权值为随机取值
    max_reward = best_reward
    max_weights = best_weights

    for iter in range(10000):# 迭代10000次
        cur_weights = None

        if algo == "hill_climbing": # 选取动作决策的算法 
            # print(best_weights)
            cur_weights = get_weights_by_hill_climbing(best_weights)
        else: # 若为随机猜测算法，则选取随机权值
            cur_weights = get_weights_by_random_guess()
        # 获取当前权值的模型控制的奖励和
        cur_sum_reward = get_sum_reward_by_weights(env, cur_weights)

        # print(cur_sum_reward, cur_weights)
        # 更新当前最优权值
        if cur_sum_reward > best_reward:
            best_reward = cur_sum_reward
            best_weights = cur_weights
            if max_reward < best_reward:
                max_reward = best_reward
                max_weights = best_weights
        # 达到最佳奖励阈值后结束
        elif random.randint(0,10000)/10000<np.exp(-iter/10000) or cur_sum_reward < max_reward*0.90:
            #print(iter,random.randint(0,10000)/10000,np.exp(-iter/10000)-np.exp(-1))
            best_reward = cur_sum_reward
            best_weights = cur_weights
        elif max_reward >= 200:
            break

    print(iter, max_reward, max_weights)
    return max_reward, max_weights

# 程序从这里开始执行
print(get_best_result()) # 调用爬山算法寻优并输出结果 

3531 200.0 [ 0.85072654 -0.79839842  2.28695231  2.7484223   0.08529538]
(200.0, array([ 0.85072654, -0.79839842,  2.28695231,  2.7484223 ,  0.08529538]))


In [2]:
#数据准备,通过cartpole随机决策输入样本数据
import gym
max_reward, max_weights=get_best_result()
X=[]     #记录observation
Y=[]     #记录正确的摆动动作
env = gym.make('CartPole-v0')
for i_episode in range(1000):
    observation = env.reset()
    for step in range(200):
        #env.render()
        #print(type(observation.tolist()))
        action = env.action_space.sample()
        action = get_action(max_weights, observation)
        #print (action,' ',action^1)
        observation, reward, done, info = env.step(action)
        X.append(observation.tolist())
        if reward:
            Y.append(action)
        else :
            Y.append(action^1)
        if done:
            #print("Episode finished after {} timesteps".format(step+1))
            break

1486 200.0 [ 0.85072654 -0.79839842  2.28695231  2.7484223   0.08529538]


In [3]:
#将80%数据作为训练集，剩下20%数据作为验证集
from sklearn.model_selection import train_test_split 
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2) 

In [4]:
#对数据进行归一化处理
from sklearn.preprocessing import StandardScaler 

scaler = StandardScaler()     
scaler.fit(x_train)     
x_train = scaler.fit_transform(x_train)       
x_test = scaler.fit_transform(x_test)


In [5]:
#SGD 二元分类器
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(max_iter=5, tol=-np.infty, random_state=42)
sgd_clf.fit(x_train, y_train)

predict = sgd_clf.predict(x_test)    
right = sum(predict == y_test) 
print ('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))       #计算在测试集上的准确度

测试集准确率：98.781152%


In [6]:
#线性回归
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)

predict = lin_reg.predict(x_test)
right=0
for i in range(len(predict)):
    if predict[i] - y_test[i] < 0.5 and predict[i] - y_test[i] > -0.5:
        right=right+1
print ('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))       #计算在测试集上的准确度

测试集准确率：95.962376%


In [7]:
#岭回归
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=1, solver="cholesky", random_state=42)
ridge_reg.fit(x_train, y_train)

predict = ridge_reg.predict(x_test)    
right=0
for i in range(len(predict)):
    if predict[i] - y_test[i] < 0.5 and predict[i] - y_test[i] > -0.5:
        right=right+1
print ('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))       #计算在测试集上的准确度

测试集准确率：95.965400%


In [8]:
#逻辑斯蒂回归
from sklearn.linear_model import LogisticRegression 
import numpy as np
model = LogisticRegression()     
model.fit(x_train,y_train)
predict = model.predict(x_test)    
right = sum(predict == y_test)          
predict= np.array(predict)
y_test = np.array(y_test)
predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1)))   # 将预测值和真实值放在一块，好观察  
print (predict)     
print ('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))       #计算在测试集上的准确度



[[0 0]
 [0 0]
 [0 0]
 ...
 [1 1]
 [1 1]
 [0 0]]
测试集准确率：99.319502%


In [9]:
#SVM 向量机
import numpy as npy
from sklearn import svm
# 线性核函数
linear = svm.SVC(kernel='linear').fit(x_train, y_train)
predict = linear.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：99.558432%


In [17]:
# 多项式核函数
poly = svm.SVC(kernel='poly', degree=2).fit(x_train, y_train)
predict = poly.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：54.458021%


In [15]:
# 径向基核函数
rbf = svm.SVC().fit(x_train, y_train)
predict = rbf.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：99.476772%


In [16]:
#sigmoid函数
sigmoid = svm.SVC(kernel='sigmoid').fit(x_train, y_train)
predict = sigmoid.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：84.415074%


In [10]:
#k近邻算法
from sklearn.datasets import load_iris  
from sklearn import neighbors  
import sklearn

knn=knn = neighbors.KNeighborsClassifier()
#训练数据集  
knn.fit(x_train,y_train)
#训练准确率
score = knn.score(x_train, y_train)
predict = knn.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：99.491895%


In [11]:
#决策树算法
from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier()
tree_clf.fit(x_train,y_train)

predict = tree_clf.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：99.068473%


In [12]:
#多分类器
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(solver="liblinear", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)
svm_clf = SVC(gamma="auto", random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

voting_clf.fit(x_train, y_train)
predict = voting_clf.predict(x_test)    
right = sum(predict == y_test) 
print('测试集准确率：%f%%'%(right*100.0/predict.shape[0]))

测试集准确率：99.497943%


In [13]:
#bagging
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    bootstrap=True, n_jobs=-1, random_state=42)
bag_clf.fit(x_train, y_train)
y_pred = bag_clf.predict(x_test)
right = sum(y_pred == y_test) 
print('测试集准确率：%f%%'%(right*100.0/y_pred.shape[0]))

测试集准确率：99.488870%


In [14]:
#随机森林
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)
rnd_clf.fit(x_train, y_train)

y_pred_rf = rnd_clf.predict(x_test)
right = sum(y_pred_rf == y_test) 
print('测试集准确率：%f%%'%(right*100.0/y_pred_rf.shape[0]))

测试集准确率：95.832325%
