forked from mlresearch/v101
-
Notifications
You must be signed in to change notification settings - Fork 0
/
acml19.bib
2580 lines (2500 loc) · 142 KB
/
acml19.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@Proceedings{acml19,
booktitle = {Proceedings of The Eleventh Asian Conference on
Machine Learning},
name = {Asian Conference on Machine Learning},
shortname = {ACML},
year = {2019},
editor = {Lee, Wee Sun and Suzuki, Taiji},
volume = {101},
start = {2019-11-17},
end = {2019-11-19},
published = {2019-10-15},
address = {Nagoya, Japan},
sections = {Preface|Accepted Papers},
url = {http://www.acml-conf.org/2019/}
}
@InProceedings{lee19,
title = {Asian Conference on Machine Learning: Preface},
author = {Wee Sun Lee and Taiji Suzuki},
pages = {i-xvi},
section = {Preface},
abstract = {Preface to ACML 2019.}
}
@InProceedings{nishio19,
title = {Random Projection in Neural Episodic Control},
author = {Nishio, Daichi and Yamane, Satoshi},
pages = {1-15},
crossref = {acml19},
abstract = {End-to-end deep reinforcement learning has enabled
agents to learn with little preprocessing by
humans. However, it is still difficult to learn
stably and efficiently because the learning method
usually uses a nonlinear function
approximation. Neural Episodic Control (NEC), which
has been proposed in order to improve sample
efficiency, is able to learn stably by estimating
action values using a non-parametric method. In this
paper, we propose an architecture that incorporates
random projection into NEC to train with more
stability. In addition, we verify the effectiveness
of our architecture by Atari's five games. The main
idea is to reduce the number of parameters that have
to learn by replacing neural networks with random
projection in order to reduce dimensions while
keeping the learning end-to-end.}
}
@InProceedings{ji19,
title = {Differentially Private Community Detection in
Attributed Social Networks},
author = {Ji, Tianxi and Luo, Changqing and Guo, Yifan and Ji,
Jinlong and Liao, Weixian and Li, Pan},
pages = {16-31},
crossref = {acml19},
abstract = {Community detection is an effective approach to
unveil social dynamics among individuals in social
networks. In the literature, quite a few algorithms
have been proposed to conduct community detection by
exploiting the topology of social networks and the
attributes of social actors. In practice, community
detection is usually conducted by third parties like
advertisement companies, hospitals, with access to
social networks for different purposes, which can
easily lead to privacy breaches. In this paper, we
investigate community detection in social networks
aiming to protect the privacy of both the network
topologies and the users' attributes. In particular,
we propose a new scheme called differentially
private community detection (DPCD). DPCD detects
communities in social networks via a probabilistic
generative model, which can be decomposed into
subproblems solved by individual users. The private
social relationships and attributes of each user are
protected by objective perturbation with
differential privacy guarantees. Through both
theoretical analysis and experimental validation
using synthetic and real world social networks, we
demonstrate that the proposed DPCD scheme detects
social communities under modest privacy budget.}
}
@InProceedings{yang19a,
title = {Towards Governing Agent's Efficacy:
Action-Conditional $\beta$-VAE for Deep Transparent
Reinforcement Learning},
author = {Yang, John and Lee, Gyuejeong and Chang, Simyung and
Kwak, Nojun},
pages = {32-47},
crossref = {acml19},
abstract = {We tackle the blackbox issue of deep neural networks
in the settings of reinforcement learning (RL) where
neural agents learn towards maximizing reward gains
in an uncontrollable way. Such learning approach is
risky when the interacting environment includes an
expanse of state space because it is then almost
impossible to foresee all unwanted outcomes and
penalize them with negative rewards beforehand. We
propose Action-conditional $\beta$-VAE
(AC-$\beta$-VAE) that allows succinct mappings of
action-dependent factors in desirable dimensions of
latent representations while disentangling
environmental factors. Our proposed method tackles
the blackbox issue by encouraging an RL policy
network to learn interpretable latent features by
distinguits influenshing ices from uncontrollable
environmental factors, which closely resembles the
way humans understand their scenes. Our experimental
results show that the learned latent factors not
only are interpretable, but also enable modeling the
distribution of entire visited state-action
space. We have experimented that this characteristic
of the proposed structure can lead to ex post facto
governance for desired behaviors of RL agents.}
}
@InProceedings{tang19,
title = {An Articulated Structure-aware Network for 3D Human
Pose Estimation},
author = {Tang, Zhenhua and Zhang, Xiaoyan and Hou, Junhui},
pages = {48-63},
crossref = {acml19},
abstract = {In this paper, we propose a new end-to-end
articulated structure-aware network to regress 3D
joint coordinates from the given 2D joint
detections. The proposed method is capable of
dealing with hard joints well that usually fail
existing methods. Specifically, our framework
cascades a refinement network with a basic network
for two types of joints, and employs a attention
module to simulate a camera projection model. In
addition, we propose to use a random enhancement
module to intensify the constraints between
joints. Experimental results on the Human3.6M and
HumanEva databases demonstrate the effectiveness and
flexibility of the proposed network, and errors of
hard joints and bone lengths are significantly
reduced, compared with state-of-the-art approaches.}
}
@InProceedings{wang19a,
title = {A Continuous Actor-Critic Reinforcement Learning
Approach to Flocking with Fixed-Wing UAVs},
author = {Wang, Chang and Yan, Chao and Xiang, Xiaojia and
Zhou, Han},
pages = {64-79},
crossref = {acml19},
abstract = {Controlling a squad of fixed-wing UAVs is
challenging due to the kinematics complexity and the
environmental dynamics. In this paper, we develop a
novel actor-critic reinforcement learning approach
to solve the leader-follower flocking problem in
continuous state and action spaces. Specifically, we
propose a CACER algorithm that uses multilayer
perceptron to represent both the actor and the
critic, which has a deeper structure and provides a
better function approximator than the original
continuous actor-critic learning automation (CACLA)
algorithm. Besides, we propose a double prioritized
experience replay (DPER) mechanism to further
improve the training efficiency. Specifically, the
state transition samples are saved into two
different experience replay buffers for updating the
actor and the critic separately, based on the
calculation of sample priority using the temporal
difference errors. We have not only compared CACER
with CACLA and a benchmark deep reinforcement
learning algorithm DDPG in numerical simulation, but
also demonstrated the performance of CACER in
semi-physical simulation by transferring the learned
policy in the numerical simulation without parameter
tuning.}
}
@InProceedings{wang19b,
title = {Multiple Empirical Kernel Learning with Discriminant
Locality Preservation},
author = {Wang, Bolu and Li, Dongdong and Wang, Zhe},
pages = {80-93},
crossref = {acml19},
abstract = {Multiple Kernel Learning (MKL) algorithm effectively
combines different kernels to improve the
performance of classification. Most MKL algorithms
implicitly map samples into feature space by the
form of inner-product. In contrast, Multiple
Empirical Kernel Learning (MEKL) can explicitly map
the input spaces into feature spaces so that the
mapped feature vectors are explicitly represented,
which is easy to process and analyze the
adaptability of kernels for input space. Meanwhile,
in order to pay attention to the structure and
discriminant information of samples in empirical
feature space, inspired by discriminant locality
preserving projections, we introduce the
discriminant locality preservation regularization
into MEKL framework to propose the Multiple
Empirical Kernel Learning with Discriminant Locality
Preservation (MEKL-DLP). Experiments conducted on
real-world datasets validate the effectiveness of
the proposed MEKL-DLP compared with the classical
kernel-based algorithms and state-of-art MKL
algorithms.}
}
@InProceedings{furusho19,
title = {ResNet and Batch-normalization Improve Data
Separability},
author = {Furusho, Yasutaka and Ikeda, Kazushi},
pages = {94-108},
crossref = {acml19},
abstract = {The skip-connection and the batch-normalization (BN)
in ResNet enable an extreme deep neural network to
be trained with high performance. However, the
reasons for its high performance are still
unclear. To clear that, we study the effects of the
skip-connection and the BN on the class-related
signal propagation through hidden layers because a
large ratio of the between-class distance to the
within-class distance of feature vectors at the last
hidden layer induces high performance. Our result
shows that the between-class distance and the
within-class distance change differently through
layers: the deep multilayer perceptron with randomly
initialized weights degrades the ratio of the
between-class distance to the within-class distance
and the skip-connection and the BN relax this
degradation. Moreover, our analysis implies that the
skip-connection and the BN encourage training to
improve this distance ratio. These results imply
that the skip-connection and the BN induce high
performance.}
}
@InProceedings{hu19,
title = {Variational Conditional GAN for Fine-grained
Controllable Image Generation},
author = {Hu, Mingqi and Zhou, Deyu and He, Yulan},
pages = {109-124},
crossref = {acml19},
abstract = {In this paper, we propose a novel variational
generator framework for conditional GANs to catch
semantic details for improving the generation
quality and diversity. Traditional generators in
conditional GANs simply concatenate the conditional
vector with the noise as the input representation,
which is directly employed for upsampling
operations. However, the hidden condition
information is not fully exploited, especially when
the input is a class label. Therefore, we introduce
a variational inference into the generator to infer
the posterior of latent variable only from the
conditional input, which helps achieve a variable
augmented representation for image
generation. Qualitative and quantitative
experimental results show that the proposed method
outperforms the state-of-the-art approaches and
achieves the realistic controllable images.}
}
@InProceedings{yang19b,
title = {Deep Learning with a Rethinking Structure for
Multi-label Classification},
author = {Yang, Yao-Yuan and Lin, Yi-An and Chu, Hong-Min and
Lin, Hsuan-Tien},
pages = {125-140},
crossref = {acml19},
abstract = {Multi-label classification (MLC) is an important
class of machine learning problems that come with a
wide spectrum of applications, each demanding a
possibly different evaluation criterion. When
solving the MLC problems, we generally expect the
learning algorithm to take the hidden correlation of
the labels into account to improve the prediction
performance. Extracting the hidden correlation is
generally a challenging task. In this work, we
propose a novel deep learning framework to better
extract the hidden correlation with the help of the
memory structure within recurrent neural
networks. The memory stores the temporary guesses on
the labels and effectively allows the framework to
rethink about the goodness and correlation of the
guesses before making the final
prediction. Furthermore, the rethinking process
makes it easy to adapt to different evaluation
criteria to match real-world application needs. In
particular, the framework can be trained in an
end-to-end style with respect to any given MLC
evaluation criteria. The end-to-end design can be
seamlessly combined with other deep learning
techniques to conquer challenging MLC problems like
image tagging. Experimental results across many
real-world data sets justify that the rethinking
framework indeed improves MLC performance across
different evaluation criteria and leads to superior
performance over state-of-the-art MLC algorithms.}
}
@InProceedings{konagayoshi19,
title = {Minimax Online Prediction of Varying Bernoulli
Process under Variational Approximation},
author = {Konagayoshi, Kenta and Watanabe, Kazuho},
pages = {141-156},
crossref = {acml19},
abstract = {Multi-label classification (MLC) is an important
class of machine learning problems that come with a
wide spectrum of applications, each demanding a
possibly different evaluation criterion. When
solving the MLC problems, we generally expect the
learning algorithm to take the hidden correlation of
the labels into account to improve the prediction
performance. Extracting the hidden correlation is
generally a challenging task. In this work, we
propose a novel deep learning framework to better
extract the hidden correlation with the help of the
memory structure within recurrent neural
networks. The memory stores the temporary guesses on
the labels and effectively allows the framework to
rethink about the goodness and correlation of the
guesses before making the final
prediction. Furthermore, the rethinking process
makes it easy to adapt to different evaluation
criteria to match real-world application needs. In
particular, the framework can be trained in an
end-to-end style with respect to any given MLC
evaluation criteria. The end-to-end design can be
seamlessly combined with other deep learning
techniques to conquer challenging MLC problems like
image tagging. Experimental results across many
real-world data sets justify that the rethinking
framework indeed improves MLC performance across
different evaluation criteria and leads to superior
performance over state-of-the-art MLC algorithms.}
}
@InProceedings{wang19c,
title = {Multivariate Time Series Prediction Based on
Optimized Temporal Convolutional Networks with
Stacked Auto-encoders},
author = {Wang, Yunxiao and Liu, Zheng and Hu, Di and Zhang,
Mian},
pages = {157-172},
crossref = {acml19},
abstract = {Multivariate time series prediction has recently
attracted extensive research attention due to its
wide applications in the area of financial
investment, energy consumption, environmental
pollution and so on. Because of the temporal
complexity and nonlinearity existing in multivariate
time series, few existing models could provide
satisfactory prediction results. In this paper, we
proposed a novel prediction approach based on
optimized temporal convolutional networks with
stacked auto-encoders, which can achieve better
prediction performance as demonstrated in the
experiments. Stacked auto-encoders are employed to
extract effective features from complex multivariate
time series. A temporal convolutional network is
then constructed serving as the prediction model,
which has a flexible receptive field and enjoys
faster training speed with parallel computing
ability than recurrent neural networks. The optimal
hyperparameters in these models are discovered by
Bayesian optimization. We performed extensive
experiments by comparing the proposed algorithms and
other popular algorithms on three different
datasets, where the proposed approach obtain the
best prediction results in various prediction
horizons. In addition, we carefully analyze the
search process of Bayesian optimization and provide
further insights into hyperparametric tuning
processes combining the exploration strategy with
the exploitation strategy.}
}
@InProceedings{mollaysa19,
title = {Learning to Augment with Feature Side-information},
author = {Mollaysa, Amina and Kalousis, Alexandros and Bruno,
Eric and Diephuis, Maurits},
pages = {173-187},
crossref = {acml19},
abstract = {Multivariate time series prediction has recently
attracted extensive research attention due to its
wide applications in the area of financial
investment, energy consumption, environmental
pollution and so on. Because of the temporal
complexity and nonlinearity existing in multivariate
time series, few existing models could provide
satisfactory prediction results. In this paper, we
proposed a novel prediction approach based on
optimized temporal convolutional networks with
stacked auto-encoders, which can achieve better
prediction performance as demonstrated in the
experiments. Stacked auto-encoders are employed to
extract effective features from complex multivariate
time series. A temporal convolutional network is
then constructed serving as the prediction model,
which has a flexible receptive field and enjoys
faster training speed with parallel computing
ability than recurrent neural networks. The optimal
hyperparameters in these models are discovered by
Bayesian optimization. We performed extensive
experiments by comparing the proposed algorithms and
other popular algorithms on three different
datasets, where the proposed approach obtain the
best prediction results in various prediction
horizons. In addition, we carefully analyze the
search process of Bayesian optimization and provide
further insights into hyperparametric tuning
processes combining the exploration strategy with
the exploitation strategy.}
}
@InProceedings{gherbi19,
title = {An Encoding Adversarial Network for Anomaly
Detection},
author = {Gherbi, Elies and Hanczar, Blaise and Janodet,
Jean-Christophe and Klaudel, Witold},
pages = {188-203},
crossref = {acml19},
abstract = {Anomaly detection is a standard problem in Machine
Learning with various applications such as
health-care, predictive maintenance, and
cyber-security. In such applications, the data is
unbalanced: the rate of regular examples is much
higher than the anomalous examples. The emergence of
the Generative Adversarial Networks (GANs) has
recently brought new algorithms for anomaly
detection. Most of them use the generator as a proxy
for the reconstruction loss. The idea is that the
generator cannot reconstruct an anomaly. We develop
an alternative approach for anomaly detection, based
on an Encoding Adversarial Network (AnoEAN), which
maps the data to a latent space (decision space),
where the detection of anomalies is done directly by
calculating a score. Our encoder is learned by
adversarial learning, using two loss functions, the
first constraining the encoder to project regular
data into a Gaussian distribution and the second, to
project anomalous data outside this distribution. We
conduct a series of experiments on several standard
bases and show that our approach outperforms the
state of the art when using 10\% anomalies during
the learning stage, and detects unseen anomalies.}
}
@InProceedings{asadi19,
title = {Model-Based Reinforcement Learning Exploiting
State-Action Equivalence},
author = {Asadi, Mahsa and Talebi, Mohammad Sadegh and Bourel,
Hippolyte and Maillard, Odalric-Ambrym},
pages = {204-219},
crossref = {acml19},
abstract = {Leveraging an equivalence property in the
state-space of a Markov Decision Process (MDP) has
been investigated in several studies. This paper
studies equivalence structure in the reinforcement
learning (RL) setup, where transition distributions
are no longer assumed to be known. We present a
notion of similarity between transition
probabilities of various state-action pairs of an
MDP, which naturally defines an equivalence
structure in the state-action space. We present
equivalence-aware confidence sets for the case where
the learner knows the underlying structure in
advance. These sets are provably smaller than their
corresponding equivalence-oblivious counterparts. In
the more challenging case of an unknown equivalence
structure, we present an algorithm called
ApproxEquivalence that seeks to find an
(approximate) equivalence structure, and define
confidence sets using the approximate
equivalence. To illustrate the efficacy of the
presented confidence sets, we present C-UCRL, as a
natural modification of UCRL2 for RL in undiscounted
MDPs. In the case of a known equivalence structure,
we show that C-UCRL\ improves over UCRL2 in terms of
\emph{regret} by a factor of $\sqrt{SA/C}$, in any
communicating MDP with $S$ states, $A$ actions, and
$C$ classes, which corresponds to a massive
improvement when $C\ll SA$. To the best of our
knowledge, this is the first work providing regret
bounds for RL when an equivalence structure in the
MDP is efficiently exploited. In the case of an
unknown equivalence structure, we show through
numerical experiments that C-UCRL\ combined with
ApproxEquivalence outperforms UCRL2 in ergodic
MDPs.}
}
@InProceedings{wang19d,
title = {A Model of Text-Enhanced Knowledge Graph
Representation Learning with Collaborative
Attention},
author = {Wang, Yashen and Zhang, Huanhuan and Xie, Haiyong},
pages = {220-235},
crossref = {acml19},
abstract = {This paper proposes a novel collaborative attention
mechanism, to fully utilize the mutually reinforcing
relationship among the knowledge graph
representation learning procedure (i.e., structure
representation) and textual relation representation
learning procedure (i.e., text
representation). Based on this collaborative
attention mechanism, a text-enhanced knowledge graph
(KG) representation model is proposed, which could
utilize textual information to enhance the knowledge
representations and make the multi-direction signals
to be fully integrated to learn more accurate
textual representations for further improving
structure representation and vice
versa. Experimental results demonstrate the
efficiency of the proposed model on both link
prediction task and triple classification task.}
}
@InProceedings{wang19e,
title = {SPCDet: Enhancing Object Detection with Combined
Feature Fusing},
author = {Wang, Haixin and Wu, Lintao and Wu, Qiongzhi},
pages = {236-251},
crossref = {acml19},
abstract = {Feature pyramid and feature fusing are widely used
in object detection. Using feature pyramid can
confront the challenge of scale variation across
different objects. Feature fusing imports context
information to improve detection
performance. Although detecting with feature pyramid
and feature fusing has achieved some encouraging
results, there are still some limitations owing to
the features' level variance among different
layers. In this paper, we exploit that
serial-parallel combined feature fusing can enhance
object detection. Instead of detecting on the
feature pyramid of backbone directly, we fuse
different layers from backbone as base
features. Then the base features are fed into a
U-shape module to build local-global feature
pyramid. At last, we use the pyramid to do the
multi-scale detection with our combined feature
fusing method. We call this one-stage detector
SPCDet. It keeps real time speed and outperforms
other detectors in trade-off between accuracy and
speed.}
}
@InProceedings{torossian19,
title = {$\mathcal{X}$-Armed Bandits: Optimizing Quantiles,
CVaR and Other Risks},
author = {Torossian, L\'eonard and Garivier, Aur\'elien and
Picheny, Victor},
pages = {252-267},
crossref = {acml19},
abstract = {We propose and analyze StoROO, an algorithm for risk
optimization on stochastic black-box functions
derived from StoOO. Motivated by risk-averse
decision making fields like agriculture, medicine,
biology or finance, we do not focus on the mean
payoff but on generic functionals of the return
distribution. We provide a generic regret analysis
of StoROO and illustrate its applicability with two
examples: the optimization of quantiles and
CVaR. Inspired by the bandit literature and
black-box mean optimizers, StoROO relies on the
possibility to construct confidence intervals for
the targeted functional based on random-size
samples. We detail their construction in the case of
quantiles, providing tight bounds based on
Kullback-Leibler divergence. We finally present
numerical experiments that show a dramatic impact of
tight bounds for the optimization of quantiles and
CVaR.}
}
@InProceedings{sahu19,
title = {Optimal PAC-Bayesian Posteriors for Stochastic
Classifiers and their use for Choice of SVM
Regularization Parameter},
author = {Sahu, Puja and Hemachandra, Nandyala},
pages = {268-283},
crossref = {acml19},
abstract = {PAC-Bayesian set up involves a stochastic classifier
characterized by a posterior distribution on a
classifier set, offers a high probability bound on
its averaged true risk and is robust to the training
sample used. For a given posterior, this bound
captures the trade off between averaged empirical
risk and KL-divergence based model complexity
term. Our goal is to identify an optimal posterior
with the least PAC-Bayesian bound. We consider a
finite classifier set and 5 distance functions:
KL-divergence, its Pinsker's and a sixth degree
polynomial approximations; linear and squared
distances. Linear distance based model results in a
convex optimization problem and we obtain a closed
form expression for its optimal posterior. For
uniform prior, this posterior has full support with
weights negative-exponentially proportional to
number of misclassifications. Squared distance and
Pinsker's approximation bounds are possibly
quasi-convex and are observed to have single local
minimum. We derive fixed point equations (FPEs)
using partial KKT system with strict positivity
constraints. This obviates the combinatorial search
for subset support of the optimal posterior. For
uniform prior, exponential search on a
full-dimensional simplex can be limited to an
ordered subset of classifiers with increasing
empirical risk values. These FPEs converge rapidly
to a stationary point, even for a large classifier
set when a solver fails. We apply these approaches
to SVMs generated using a finite set of SVM
regularization parameter values on 9 UCI
datasets. The resulting optimal posteriors (on the
set of regularization parameters) yield stochastic
SVM classifiers with tight bounds. KL-divergence
based bound is the tightest, but is computationally
expensive due to its non-convex nature and multiple
calls to a root finding algorithm. Optimal
posteriors for all 5 distance functions have lowest
10\% test error values on most datasets, with that
of linear distance being the easiest to obtain.}
}
@InProceedings{huang19a,
title = {Realistic Image Generation using Region-phrase
Attention},
author = {Huang, Wanming and Xu, Richard Yi Da and Oppermann,
Ian},
pages = {284-299},
crossref = {acml19},
abstract = {The Generative Adversarial Network (GAN) has
achieved remarkable progress in generating synthetic
images from text, especially since the use of the
attention mechanism. The current state-of-the-art
algorithm applies attentions between individual
regular-grid regions of an image and words of a
sentence. These approaches are sufficient to
generate images that contain a single object in its
foreground. However, natural languages often involve
complex foreground objects and the background may
also constitute a variable portion of the generated
image. In this case, the regular-grid region based
image attention weights may not necessarily
concentrate on the intended foreground region(s),
which in turn, results in an unnatural looking
image. Additionally, individual words such as ``a'',
``blue'' and ``shirt'' do not necessarily provide a
full visual context unless they are applied
together. For this reason, in our paper, we proposed
a novel method in which we introduced an additional
set of natural attentions between object-grid
regions and word phrases. The object-grid region is
defined by a set of auxiliary bounding boxes. They
serve as superior location indicators to where the
alignment and attention should be drawn with the
word phrases. We perform experiments on the
Microsoft Common Objects in Context (MSCOCO) dataset
and prove that our proposed approach is capable of
generating more realistic images compared with the
current state-of-the-art algorithms.}
}
@InProceedings{huang19b,
title = {Efficient Diversified Mini-Batch Selection using
Variable High-layer Features},
author = {Huang, Wanming and Xu, Richard Yi Da and Oppermann,
Ian},
pages = {300-315},
crossref = {acml19},
abstract = {Stochastic Gradient Descent (SGD) has been widely
adopted in training Deep Neural networks of various
structures. Instead of using a full dataset, a
so-called {\itshape mini-batch} is selected during
each gradient descent iteration. This aims to speed
up the learning when a large number of training data
is present. Without the knowledge of its true
underlying distribution, one often samples the data
indices uniformly. Recently, researchers applied a
diversified mini-batch selection scheme through the
use of Determinantal Point Process (DPP), in order
to avoid having highly correlated samples in one
batch ({{Zhang et al.}} ({2017})). Despite its
success, the attempts were restrictive in the sense
that they used fixed features to construct the
Gram-matrix for DPP; using the raw or fixed
higher-layer features limited the amount of
potential improvement over the convergence rate. In
this paper, we instead proposed to use variable
higher-layer features which are updated at each
iteration when the parameter changes. To avoid the
high computation cost, several contributions have
been made to speed up the computation of DPP
sampling, including: (1) using hierarchical sampling
to break down a single DPP sampling with large
Gram-matrix into many DPP samplings of much smaller
Gram-matrix and (2) using Markov k-DPP to encourage
diversity across iterations. Empirical results show
a much more diversified mini batch in each iteration
in addition to a much improved convergence compared
with the previous approach.}
}
@InProceedings{schueler19,
title = {Gradient-based Training of Slow Feature Analysis by
Differentiable Approximate Whitening},
author = {Sch{\"u}ler, Merlin and Hlynsson, Hlynur Dav\'i\dh and
Wiskott, Laurenz},
pages = {316-331},
crossref = {acml19},
abstract = {We propose Power Slow Feature Analysis, a
gradient-based method to extract temporally slow
features from a high-dimensional input stream that
varies on a faster time-scale, as a variant of Slow
Feature Analysis (SFA) that allows end-to-end
training of arbitrary differentiable architectures
and thereby significantly extends the class of
models that can effectively be used for slow feature
extraction. We provide experimental evidence that
PowerSFA is able to extract meaningful and
informative low-dimensional features in the case of
(a) synthetic low-dimensional data, (b) ego-visual
data, and also for (c) a general dataset for which
symmetric non-temporal similarities between points
can be defined. }
}
@InProceedings{staerman19,
title = {Functional Isolation Forest},
author = {Staerman, Guillaume and Mozharovskyi, Pavlo and
Cl\'emen\c{c}on, Stephan and d'Alch\'e-Buc, Florence},
pages = {332-347},
crossref = {acml19},
abstract = {For the purpose of monitoring the behavior of
complex infrastructures (\textit{e.g.} aircrafts,
transport or energy networks), high-rate sensors are
deployed to capture multivariate data, generally
unlabeled, in quasi continuous-time to detect
quickly the occurrence of anomalies that may
jeopardize the smooth operation of the system of
interest. The statistical analysis of such massive
data of functional nature raises many challenging
methodological questions. The primary goal of this
paper is to extend the popular {\scshape Isolation
Forest} (IF) approach to Anomaly Detection,
originally dedicated to finite dimensional
observations, to functional data. The major
difficulty lies in the wide variety of topological
structures that may equip a space of functions and
the great variety of patterns that may characterize
abnormal curves. We address the issue of (randomly)
splitting the functional space in a flexible manner
in order to isolate progressively any trajectory
from the others, a key ingredient to the efficiency
of the algorithm. Beyond a detailed description of
the algorithm, computational complexity and
stability issues are investigated at length. From
the scoring function measuring the degree of
abnormality of an observation provided by the
proposed variant of the IF algorithm, a
\textit{Functional Statistical Depth} function is
defined and discussed, as well as a multivariate
functional extension. Numerical experiments provide
strong empirical evidence of the accuracy of the
extension proposed.}
}
@InProceedings{bo19,
title = {Latent Multi-view Semi-Supervised Classification},
author = {Bo, Xiaofan and Kang, Zhao and Zhao, Zhitong and Su,
Yuanzhang and Chen, Wenyu},
pages = {348-362},
crossref = {acml19},
abstract = {To explore underlying complementary information from
multiple views, in this paper, we propose a novel
Latent Multi-view Semi-Supervised Classification
(LMSSC) method. Unlike most existing multi-view
semi-supervised classification methods that learn
the graph using original features, our method seeks
an underlying latent representation and performs
graph learning and label propagation based on the
learned latent representation. With the
complementarity of multiple views, the latent
representation could depict the data more
comprehensively than every single view individually,
accordingly making the graph more accurate and
robust as well. Finally, LMSSC integrates latent
representation learning, graph construction, and
label propagation into a unified framework, which
makes each subtask optimized. Experimental results
on real-world benchmark datasets validate the
effectiveness of our proposed method.}
}
@InProceedings{zhang19a,
title = {Cascaded and Dual: Discrimination Oriented Network
for Brain Tumor Classification},
author = {Zhang, Wenxuan and Zhang, Dong and Xiang, Xinguang},
pages = {363-378},
crossref = {acml19},
abstract = {Medical image classification is one of the
fundamental research topics in the domain of
computer-aided diagnosis. Although existing
classification models of the natural image can
produce promising results using deep convolutional
neural networks in some cases, it is difficult to
guarantee that these models can generate promising
performance for medical images. To bridge such a
gap, we propose a novel medical image classification
method for brain tumors in this paper, termed as
Discrimination Oriented Network (DONet). Inspired by
the attention learning mechanism of the human brain,
we first propose two categories of attention
learning modules, i.e., the Cascaded Attention
Learning (CAL) and the Dual Attention Learning
(DAL), which can learn the discrimination
information in both the spatial-wise and the
channel-wise dimensions in a fine-grained manner. By
the CAL and the DAL, the attention information of
different dimensions is calculated in a series
manner (for cascaded) and a parallel manner (for
dual), respectively. To demonstrate the superiority
of our proposed modules, we implement the CAL and
the DAL on the Deep Residual Network (ResNet) for
brain tumor classification. Compared with the
ResNet, experimental results show that the DONet has
a significant improvement in accuracy. Moreover,
compared with state-of-the-art classification
methods, the DONet can also achieve better
performance.}
}
@InProceedings{bondu19,
title = {\textsc{fears}: a \textsc{fe}ature \textsc{a}nd
\textsc{r}epresentation \textsc{s}election approach
for time series classification},
author = {Bondu, Alexis and Gay, Dominique and Lemaire,
Vincent and Boull\'e, Marc and Cervenka, Eole},
pages = {379-394},
crossref = {acml19},
abstract = {This paper presents a method which extracts
informative features while selecting simultaneously
adequate representations for Time Series
Classification. This method simultaneously (i)
selects alternative representations, such as
derivatives, cumulative integrals, power spectrum …
(ii) and extracts informative features (via
automatic variable construction) from the selected
set of representations. The suggested approach is
decomposed in three steps: (i) the original time
series are transformed into several representations
which are stored as relational data; (ii) then, a
{regularized} propositionalisation method is applied
in order to generate informative aggregate features;
(iii) finally, a selective Naive Bayes classifier is
learned from the outcoming feature-value data
table. The previous steps are repeated by a forward
backward selection algorithm in order to select the
most informative subset of representations. The
suggested approach proves to be highly competitive
when compared with state-of-the-art methods while
extracting interpretable features. Furthermore, the
suggested approach is almost parameter free and only
requires few hardware resources.}
}
@InProceedings{lin19,
title = {Unified Policy Optimization for Robust Reinforcement
Learning},
author = {Lin, Zichuan and Zhao, Li and Bian, Jiang and Qin,
Tao and Yang, Guangwen},
pages = {395-410},
crossref = {acml19},
abstract = {Recent years have witnessed significant progress in
solving challenging problems across various domains
using deep reinforcement learning (RL). Despite the
success, the weak robustness has risen as a big
obstacle for applying existing RL algorithms into
real problems. In this paper, we propose unified
policy optimization (UPO), a sample-efficient shared
policy framework that allows a policy to update
itself by considering different gradients generated
by different policy gradient (PG)
methods. Specifically, we propose two algorithms
called UPO-MAB and UPO-ES, to leverage these
different gradients by adopting the idea of
multi-arm bandit (MAB) and evolution strategies
(ES), with the purpose of finding the gradient
direction leading to more performance gain with less
extra data cost. Extensive experiments show that our
approach can lead to stronger robustness and better
performance than baselines.}
}
@InProceedings{chen19,
title = {Multi-Label Learning with Regularization Enriched
Label-Specific Features},
author = {Chen, Ze-Sen and Zhang, Min-Ling},
pages = {411-424},
crossref = {acml19},
abstract = {Multi-label learning learns from examples each
associated with multiple class labels
simultaneously, and the goal is to induce a
predictive model which can assign a set of relevant
labels for the unseen instance. Label-specific
features serve as an effective strategy towards
inducing multi-label predictive model, where the
relevancy of each class label is determined by
employing tailored features encoding inherent and
distinct characteristics of the class label its
own. In this paper, a regularization based approach
named {\textsc{Reel}} is proposed for label-specific
features generation, which works by enriching
label-specific feature representation for each class
label via synergizing informative label-specific
features from other class labels with sparse
regularization. Specifically, full-order label
correlations are considered by {\textsc{Reel}} while
the number of classifiers induced for multi-label
prediction is linear to the number of class
labels. Extensive experiments on fifteen benchmark
multi-label data sets clearly show the favorable
performance of {\textsc{Reel}} against other
state-of-the-art multi-label learning approaches
with label-specific features.}
}
@InProceedings{zhang19b,
title = {An Attentive Memory Network Integrated with Aspect
Dependency for Document-Level Multi-Aspect Sentiment
Classification},
author = {Zhang, Qingxuan and Shi, Chongyang},
pages = {425-440},
crossref = {acml19},
abstract = {Document-level multi-aspect sentiment classification
is one of the foundational tasks in natural language
processing (NLP) and neural network methods have
achieved great success in reviews sentiment
classification. Most of recent works ignore the
relation between different aspects and do not take
into account the contexting dependent importance of
sentences and aspect keywords. In this paper, we
propose an attentive memory network for
document-level multi-aspect sentiment
classification. Unlike recent proposed models which
average word embeddings of aspect keywords to
represent aspect and utilize hierarchical
architectures to encode review documents, we adopt
attention-based memory networks to construct aspect
and sentence memories. The recurrent attention
operation is employed to capture long-distance
dependency across sentences and obtain aspect-aware
document representations over aspect and sentence
memories. Then, incorporating the neighboring
aspects related information into the final aspect
rating predictions by using multi-hop attention
memory networks. Experimental results on two
real-world datasets TripAdvisor and BeerAdvocate
show that our model achieves state-of-the-art
performance.}
}
@InProceedings{li19a,
title = {Multi-modal Representation Learning for Successive
POI Recommendation},
author = {Li, Lishan and Liu, Ying and Wu, Jianping and He,
Lin and Ren, Gang},
pages = {441-456},
crossref = {acml19},
abstract = {Successive POI recommendation is a fundamental
problem for location-based social networks
(LBSNs). POI recommendation takes a variety of POI
context information (e.g. spatial location and
textual comment) and user preference into
consideration. Existing POI recommendation systems
mainly focus on part of the POI context and user
preference with a specific modeling, which loses
valuable information from other aspects. In this
paper, we propose to construct a multi-modal
check-in graph, a heterogeneous graph that combines
five check-in aspects in a unified way. We further
propose a multi-modal representation learning model
based on the graph to jointly learn POI and user
representations. Finally, we employ an attentional
recurrent neural network based on the
representations for successive POI
recommendation. Experiments on a public dataset
studies the effects of modeling different aspects of
check-in records and demonstrates the effectiveness
of the method in improving POI recommendation
performance.}
}
@InProceedings{wang19f,
title = {Forward and Backward Knowledge Transfer for
Sentiment Classification},
author = {Wang, Hao and Liu, Bing and Wang, Shuai and Ma,
Nianzu and Yang, Yan},
pages = {457-472},
crossref = {acml19},
abstract = {This paper studies the problem of learning a