/
index.html
1079 lines (403 loc) · 32.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="theme-next mist use-motion" lang="zh-Hans,en,default">
<head><meta name="generator" content="Hexo 3.9.0">
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<link rel="stylesheet" href="/lib/font-awesome/css/font-awesome.min.css?v=4.7.0">
<link rel="stylesheet" href="/css/main.css?v=7.2.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=7.2.0">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=7.2.0">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=7.2.0">
<link rel="mask-icon" href="/images/logo.svg?v=7.2.0" color="#222">
<script id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '/',
scheme: 'Mist',
version: '7.2.0',
sidebar: {"position":"left","display":"post","offset":12,"onmobile":false,"dimmer":false},
back2top: true,
back2top_sidebar: false,
fancybox: false,
fastclick: false,
lazyload: false,
tabs: true,
motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<meta name="description" content="TL;DR 图形游戏(比如坦克大战)如果要实现智能Agent(AKA 电脑玩家)的话,目前最佳的方案就是Reinforcement Learning (简称 RL ;中文:增强学习)。 本文记录了我和Reinforcement Learning的第一次交手,将带你了解这位名扬四海却又神秘莫测的对手。:)">
<meta name="keywords" content="机器学习,Reinforcement_Learning,坦克大战">
<meta property="og:type" content="article">
<meta property="og:title" content="Reinforcement Learning: 初次交手,多多指教">
<meta property="og:url" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/index.html">
<meta property="og:site_name" content="Howl's">
<meta property="og:description" content="TL;DR 图形游戏(比如坦克大战)如果要实现智能Agent(AKA 电脑玩家)的话,目前最佳的方案就是Reinforcement Learning (简称 RL ;中文:增强学习)。 本文记录了我和Reinforcement Learning的第一次交手,将带你了解这位名扬四海却又神秘莫测的对手。:)">
<meta property="og:locale" content="zh-Hans">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/RL_loop.svg">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/house_environment.gif">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/node_environment.gif">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/node_environment_with_reward.gif">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/agent_at_room_2.gif">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/agent_at_state_2.gif">
<meta property="og:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/matrix_r_init.gif">
<meta property="og:updated_time" content="2018-08-13T11:08:02.489Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Reinforcement Learning: 初次交手,多多指教">
<meta name="twitter:description" content="TL;DR 图形游戏(比如坦克大战)如果要实现智能Agent(AKA 电脑玩家)的话,目前最佳的方案就是Reinforcement Learning (简称 RL ;中文:增强学习)。 本文记录了我和Reinforcement Learning的第一次交手,将带你了解这位名扬四海却又神秘莫测的对手。:)">
<meta name="twitter:image" content="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/RL_loop.svg">
<link rel="alternate" href="/atom.xml" title="Howl's" type="application/atom+xml">
<link rel="canonical" href="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/">
<script id="page.configurations">
CONFIG.page = {
sidebar: "",
};
</script>
<title>Reinforcement Learning: 初次交手,多多指教 | Howl's</title>
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-105150423-2"></script>
<script>
var host = window.location.hostname;
if (host !== "localhost" || !true) {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-105150423-2');
}
</script>
<noscript>
<style>
.use-motion .motion-element,
.use-motion .brand,
.use-motion .menu-item,
.sidebar-inner,
.use-motion .post-block,
.use-motion .pagination,
.use-motion .comments,
.use-motion .post-header,
.use-motion .post-body,
.use-motion .collection-title { opacity: initial; }
.use-motion .logo,
.use-motion .site-title,
.use-motion .site-subtitle {
opacity: initial;
top: initial;
}
.use-motion .logo-line-before i { left: initial; }
.use-motion .logo-line-after i { right: initial; }
</style>
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">
<div class="container sidebar-position-left page-post-detail">
<div class="headband"></div>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">Howl's</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
</div>
<div class="site-nav-toggle">
<button aria-label="Toggle navigation bar">
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section"><i class="menu-item-icon fa fa-fw fa-home"></i> <br>Home</a>
</li>
<li class="menu-item menu-item-about">
<a href="/about/" rel="section"><i class="menu-item-icon fa fa-fw fa-user"></i> <br>About</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section"><i class="menu-item-icon fa fa-fw fa-tags"></i> <br>Tags</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section"><i class="menu-item-icon fa fa-fw fa-th"></i> <br>Categories</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section"><i class="menu-item-icon fa fa-fw fa-archive"></i> <br>Archives</a>
</li>
</ul>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<div id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Xiaoquan Kong">
<meta itemprop="description" content>
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Howl's">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">Reinforcement Learning: 初次交手,多多指教
</h1>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">Posted on</span>
<time title="Created: 2017-11-24 14:46:15" itemprop="dateCreated datePublished" datetime="2017-11-24T14:46:15+08:00">2017-11-24</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-check-o"></i>
</span>
<span class="post-meta-item-text">Edited on</span>
<time title="Modified: 2018-08-13 19:08:02" itemprop="dateModified" datetime="2018-08-13T19:08:02+08:00">2018-08-13</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="fa fa-comment-o"></i>
</span>
<span class="post-meta-item-text">Comments: </span>
<a href="/Reinforcement-Learning-初次交手,多多指教/#comments" itemprop="discussionUrl">
<span class="post-comments-count disqus-comment-count" data-disqus-identifier="Reinforcement-Learning-初次交手,多多指教/" itemprop="commentCount"></span>
</a>
</span>
<br>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p><strong>TL;DR</strong> 图形游戏(比如坦克大战)如果要实现智能Agent(AKA 电脑玩家)的话,目前最佳的方案就是Reinforcement Learning (简称 RL ;中文:增强学习)。 本文记录了我和Reinforcement Learning的第一次交手,将带你了解这位名扬四海却又神秘莫测的对手。:)<br><a id="more"></a></p>
<h1 id="背景"><a href="#背景" class="headerlink" title="背景"></a>背景</h1><p>公司举办Hackthon(中国:黑客松 <- 来自维基百科的翻译),其中有一道题是实现坦克大战的玩家程序。在别人普遍使用人工策略的情况下,考虑到我们团队人少(精确的讲只有我一人会写程序,其他团队起码三个程序员),所以在人工策略这条路上肯定是非常的吃亏,所以不如另辟蹊径,拼搏一把(反正也基本出不了初赛了)。于是我们选择了让机器学习的方案。于是 就成了目标。在此之前,我也只是听过RL的大名,现在要使用RL,内心是既兴奋又紧张。</p>
<h1 id="Reinforcement-Learning"><a href="#Reinforcement-Learning" class="headerlink" title="Reinforcement Learning"></a>Reinforcement Learning</h1><h2 id="概念"><a href="#概念" class="headerlink" title="概念"></a>概念</h2><p>为了更加容易理解,我们将引入坦克大战的例子来辅助讲解。总的来说RL模型将现实世界的问题抽象成两类对象的交互:<code>Enviroment</code>和<code>Agent</code>,对坦克大战而言,游戏就是<code>Enviroment</code>,而游戏玩家就是<code>Agent</code>。</p>
<p><code>Enviroment</code>提供<code>observation</code>:这是<code>Agent</code>对外界环境的观察,坦克大战中,游戏的图形界面就是<code>Agent</code>(游戏玩家)对<code>Enviroment</code>(游戏)的<code>observation</code>。<code>Agent</code>使用自己的逻辑,根据对<code>observation</code>的理解,给出一个<code>action</code>:这表示对外界环境的一个操作或者反馈,坦克大战中游戏玩家的操作就是<code>action</code>。<code>action</code>在每个step中会被发送给<code>Enviroment</code>,<code>Environment</code>则会返回新的<code>observation</code>和<code>reward</code>。<code>reward</code>表示的是当前情况下<code>Environment</code>对<code>aciton</code>的反馈:数值可能正数也可能负数也可能是零,坦克大战中坦克被击毁、击毁敌方坦克、获取装备或者旗帜等直接得分或者失分都算是<code>reward</code>。然后<code>Agent</code>根据新的<code>observation</code>给出新的<code>action</code>,如此循环往复。聪明的算法能在<code>observation</code>、<code>action</code>和<code>reward</code>中发现关系,使得每一次给出的<code>action</code>都能得到最大期望的<code>reward</code>。</p>
<p><img src="RL_loop.svg" alt="Reinforcement Learning Loop"></p>
<h2 id="特点"><a href="#特点" class="headerlink" title="特点"></a>特点</h2><p>与Supervised Learning不同的是,Reinforcement Learning要解决的问题是存在<code>reward delay</code>现象的,也就是说Reinforcement Learning会考虑全局最优,而不是当前这一步是最优的,避免“赢了战役,输了战争”这种现象。坦克大战中的例子就是高级玩家可能会选择一种策略:即使不停的被攻击,他的坦克不选择躲避炮弹而是选择承受炮火的同时持续不断的攻击你的基地的外墙。只看一步操作而言,这样的行动是失败的,因为己方的某一辆坦克被击毁,但从长远的角度来看,你能在最后一辆己方坦克被击毁前成功的击毁对方的基地。</p>
<h2 id="Q-learning-算法"><a href="#Q-learning-算法" class="headerlink" title="Q-learning 算法"></a>Q-learning 算法</h2><p>我们将介绍Reinforcement Learning中比较容易理解的算法:Q-learning</p>
<h3 id="Q-learning-简介"><a href="#Q-learning-简介" class="headerlink" title="Q-learning 简介"></a>Q-learning 简介</h3><p><strong>TODO</strong></p>
<h3 id="实验环境"><a href="#实验环境" class="headerlink" title="实验环境"></a>实验环境</h3><p>我们将引入一个简单的环境:一个房子。这个房子由5个房间构成(编号:0 - 4),连上房子外的空间(编号:5),共六个状态。房间之间与房间和户外空间之间可能存在门,也就是相互联通。如下图所示:</p>
<p><img src="house_environment.gif" alt="House environment"></p>
<p>我们根据房间的联通状态,将上述的物理房间图抽象,将每个房间抽象成一个节点(Node)或者状态(state),房子之间存在联通关系的则用一个有向边表示(因为房门是双向联通,所以每个房门对应两个相向的有向边),如下图所示:</p>
<p><img src="node_environment.gif" alt="Node environment"></p>
<p>在这个例子中,我们的目标是将 agent (抽象概念) 从房间里移动到户外空间。Q-learning 的目标是到达 reward 最高的状态,而在本例中,状态 5 就是我们想要的目标状态(也称最终状态),Q-learning 到达目标状态后就会永久留在目标状态,因此我们给状态 5 增加一个指向自己的有向边(如上图示)。这种目标或者状态称之为 absorbing goal 或者 absorbing state。</p>
<p>为了让状态 5 成为目标状态,我们将所有指向状态 5 的有向边全部赋值 reward=100 ,除此之外的边全部赋值 reward=0 。如下图所示:</p>
<p><img src="node_environment_with_reward.gif" alt="Node environment with reward"></p>
<p>假设本例子中的 agent 是一个笨笨的虚拟机器人,它会从以前的经验中学习知识,它能够从一个房间到另一个房间,但它不知道房间的情况也不知道从房间到外面空间的路径。</p>
<p>假设本例子的目标是建立一个模型帮助 agent 从房子中的 <strong>任意一个房间</strong> 出发到达户外空间。现在我们假设 agent 在房间 2 ,我们想要让它学习如何到达户外空间。如下图所示:</p>
<p><img src="agent_at_room_2.gif" alt="Agent at room 2"></p>
<p>为了和 Reinforcement Learning 保持一支,我们特别将每个房间 Node 称之状态(state)或者叫做 observation ,而将 agent 的每一次移动称之为 action。observation 在这里有点难以理解,所以这里使用 状态(state)这个 Q-learning 术语。在本例子中,action 使用有向边来表示。如下图所示:</p>
<p><img src="agent_at_state_2.gif" alt="agent at state 2"></p>
<p>从状态 2 agent 可以到达状态 3,这是因为状态 2 存在到状态 3 的有向边,也就是存在联通的门。状态 2 不能直接到达状态 1,这是因为状态 2 不存在到达状态 1 的有向边,也就是这两个房间之间不存在门。状态 3 可以到达状态 1 、状态 4 和回到状态 2。agent 在状态 1 和状态 0 的可能到达状态,读者可以自行观察,不再赘述。我们将上述所有可能状态、 action 和 reward 编制成一张表:得到 matrix R。如下图所示。</p>
<p> <img src="matrix_r_init.gif" alt="matrix r init"></p>
<p> <strong>NOTE</strong> 表中的 -1 表示无效值,也就是这个 action 不存在,比如不存在从状态 0 到状态 1 的 action 或者说门。</p>
<p> 我们将增加一个相似的矩阵 matrix Q ,用于表示 agent 从中学习的知识。matrix Q 的每一个行代表一个前一个状态,每一列表示下一个状态。刚开始时,agent 并没有学习到任何知识,所以 matrix Q 中的值初始化为 0. 在本例子中,我们已经知道所有的状态数为 6, 在现实例子中,这个数可能是未知的,所以初始化的时候可能只有一行一列,当发现新的状态时,Matrix Q 可以增加新的行和列。</p>
<p> Q-learning 的状态转移规则如下:</p>
<p> $$ Q(state, action) = R(state, action) + \gamma * Max[Q(next state, all actions)] $$</p>
<p> 按照这个公式,赋值Q中一个元素的值等于 Matrix R 中的相应的值和 $\gamma$ (学习参数) 乘以 下一个状态中所有的action的最大的 Q reward。</p>
<p> 我们的 agent 不需要老师就能从经验中学习,因此这个是 非监督学习。每一次 agent 从一个状态转到另一个状态,最终达到目标状态。这样的一次探索过程我们称之为 episode。每一个 episode 包含了 agent 从初始状态到目标状态的所有的 action。每当 agent 到达目标状态时,我们就开始下一个 episode。</p>
<p>Q-Learning 算法大体运行如下:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">Set the gamma parameter, and environment rewards in matrix R.</span><br><span class="line">Initialize matrix Q to zero.</span><br><span class="line">For each episode {</span><br><span class="line"> Select a random initial state.</span><br><span class="line"> Do While the goal state hasn't been reached. {</span><br><span class="line"> Select one among all possible actions for the current state.</span><br><span class="line"> Using this possible action, consider going to the next state.</span><br><span class="line"> Get maximum Q value for this next state based on all possible actions.</span><br><span class="line"> Compute: Q(state, action) = R(state, action) + Gamma * Max[Q(next state, all actions)]</span><br><span class="line"> Set the next state as the current state.</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
</div>
<footer class="post-footer">
<div class="post-tags">
<a href="/tags/机器学习/" rel="tag"># 机器学习</a>
<a href="/tags/Reinforcement-Learning/" rel="tag"># Reinforcement_Learning</a>
<a href="/tags/坦克大战/" rel="tag"># 坦克大战</a>
</div>
<div class="post-nav">
<div class="post-nav-next post-nav-item">
<a href="/Netgear-WNDR4300固件刷openwrt-LEDE/" rel="next" title="Netgear WNDR4300固件刷openwrt/LEDE">
<i class="fa fa-chevron-left"></i> Netgear WNDR4300固件刷openwrt/LEDE
</a>
</div>
<span class="post-nav-divider"></span>
<div class="post-nav-prev post-nav-item">
<a href="/Netgear-WNDR4300路由器LEDE环境自动路由切换技术/" rel="prev" title="Netgear WNDR4300路由器LEDE环境自动路由切换技术">
Netgear WNDR4300路由器LEDE环境自动路由切换技术 <i class="fa fa-chevron-right"></i>
</a>
</div>
</div>
</footer>
</div>
</article>
</div>
</div>
<div class="comments" id="comments">
<div id="disqus_thread">
<noscript>Please enable JavaScript to view the comments powered by Disqus.</noscript>
</div>
</div>
</div>
<div class="sidebar-toggle">
<div class="sidebar-toggle-line-wrap">
<span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
</div>
</div>
<aside id="sidebar" class="sidebar">
<div class="sidebar-inner">
<ul class="sidebar-nav motion-element">
<li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
Table of Contents
</li>
<li class="sidebar-nav-overview" data-target="site-overview-wrap">
Overview
</li>
</ul>
<div class="site-overview-wrap sidebar-panel">
<div class="site-overview">
<div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name">Xiaoquan Kong</p>
<div class="site-description motion-element" itemprop="description"></div>
</div>
<nav class="site-state motion-element">
<div class="site-state-item site-state-posts">
<a href="/archives/">
<span class="site-state-item-count">40</span>
<span class="site-state-item-name">posts</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<a href="/categories/">
<span class="site-state-item-count">7</span>
<span class="site-state-item-name">categories</span>
</a>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags/">
<span class="site-state-item-count">27</span>
<span class="site-state-item-name">tags</span>
</a>
</div>
</nav>
<div class="feed-link motion-element">
<a href="/atom.xml" rel="alternate">
<i class="fa fa-rss"></i>
RSS
</a>
</div>
<div class="links-of-author motion-element">
<span class="links-of-author-item">
<a href="https://github.com/howl-anderson" title="GitHub → https://github.com/howl-anderson" rel="noopener" target="_blank"><i class="fa fa-fw fa-github"></i>GitHub</a>
</span>
<span class="links-of-author-item">
<a href="https://www.linkedin.com/in/xiaoquankong/" title="Linkedin → https://www.linkedin.com/in/xiaoquankong/" rel="noopener" target="_blank"><i class="fa fa-fw fa-linkedin"></i>Linkedin</a>
</span>
</div>
</div>
</div>
<!--noindex-->
<div class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
<div class="post-toc">
<div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#背景"><span class="nav-number">1.</span> <span class="nav-text">背景</span></a></li><li class="nav-item nav-level-1"><a class="nav-link" href="#Reinforcement-Learning"><span class="nav-number">2.</span> <span class="nav-text">Reinforcement Learning</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#概念"><span class="nav-number">2.1.</span> <span class="nav-text">概念</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#特点"><span class="nav-number">2.2.</span> <span class="nav-text">特点</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Q-learning-算法"><span class="nav-number">2.3.</span> <span class="nav-text">Q-learning 算法</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#Q-learning-简介"><span class="nav-number">2.3.1.</span> <span class="nav-text">Q-learning 简介</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#实验环境"><span class="nav-number">2.3.2.</span> <span class="nav-text">实验环境</span></a></li></ol></li></ol></li></ol></div>
</div>
</div>
<!--/noindex-->
</div>
</aside>
</div>
</main>
<footer id="footer" class="footer">
<div class="footer-inner">
<div class="copyright">© 2015 – <span itemprop="copyrightYear">2020</span>
<span class="with-love" id="animate">
<i class="fa fa-user"></i>
</span>
<span class="author" itemprop="copyrightHolder">Xiaoquan Kong</span>
</div>
</div>
</footer>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
</div>
</div>
<script>
if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
window.Promise = null;
}
</script>
<script src="/lib/jquery/index.js?v=3.4.1"></script>
<script src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
<script src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
<script src="/js/utils.js?v=7.2.0"></script>
<script src="/js/motion.js?v=7.2.0"></script>
<script src="/js/schemes/muse.js?v=7.2.0"></script>
<script src="/js/scrollspy.js?v=7.2.0"></script>
<script src="/js/post-details.js?v=7.2.0"></script>
<script src="/js/next-boot.js?v=7.2.0"></script>
<script>
function loadCount() {
var d = document, s = d.createElement('script');
s.src = 'https://blog-xiaoquankong-ai.disqus.com/count.js';
s.id = 'dsq-count-scr';
(d.head || d.body).appendChild(s);
}
// defer loading until the whole page loading is completed
window.addEventListener('load', loadCount, false);
</script>
<script>
var disqus_config = function() {
this.page.url = "https://blog.xiaoquankong.ai/Reinforcement-Learning-初次交手,多多指教/";
this.page.identifier = "Reinforcement-Learning-初次交手,多多指教/";
this.page.title = 'Reinforcement Learning: 初次交手,多多指教';
};
function loadComments() {
var d = document, s = d.createElement('script');
s.src = 'https://blog-xiaoquankong-ai.disqus.com/embed.js';
s.setAttribute('data-timestamp', '' + +new Date());
(d.head || d.body).appendChild(s);
}
window.addEventListener('load', loadComments, false);
</script>