From dec03924b4aefa0c5caba57faf72a88c688d308c Mon Sep 17 00:00:00 2001 From: Dong Wang Date: Sun, 14 Oct 2012 01:18:23 +0800 Subject: [PATCH] add old blogs --- _config.yml | 8 +- ...03-10-careful-with-scala-for-loop.markdown | 169 ++++++++++++++++++ ...3-11-scala-annotation-specialized.markdown | 90 ++++++++++ ...licit-importing-implicit-in-scala.markdown | 41 +++++ ...3-13-scala-structual-type-is-slow.markdown | 35 ++++ 5 files changed, 339 insertions(+), 4 deletions(-) create mode 100644 source/_posts/2011-03-10-careful-with-scala-for-loop.markdown create mode 100644 source/_posts/2011-03-11-scala-annotation-specialized.markdown create mode 100644 source/_posts/2011-03-12-avoid-explicit-importing-implicit-in-scala.markdown create mode 100644 source/_posts/2012-03-13-scala-structual-type-is-slow.markdown diff --git a/_config.yml b/_config.yml index df18bbb..cc8b660 100644 --- a/_config.yml +++ b/_config.yml @@ -3,9 +3,9 @@ # ----------------------- # url: http://dongw.github.com -title: My Octopress Blog -subtitle: A blogging framework for hackers. -author: Your Name +title: Homerun +subtitle: My name is Dong Wang, a X-Googler and Scala enthusiast. +author: Dong Wang simple_search: http://google.com/search description: @@ -13,7 +13,7 @@ description: # You can customize the format as defined in # http://www.ruby-doc.org/core-1.9.2/Time.html#method-i-strftime # Additionally, %o will give you the ordinal representation of the day -date_format: "ordinal" +date_format: "%D" # RSS / Email (optional) subscription links (change if using something like Feedburner) subscribe_rss: /atom.xml diff --git a/source/_posts/2011-03-10-careful-with-scala-for-loop.markdown b/source/_posts/2011-03-10-careful-with-scala-for-loop.markdown new file mode 100644 index 0000000..fd1c0a2 --- /dev/null +++ b/source/_posts/2011-03-10-careful-with-scala-for-loop.markdown @@ -0,0 +1,169 @@ +--- +layout: post +title: "Scala的循环可能很慢!" +date: 2011-03-10 10:39 +comments: true +categories: [Scala, performance] +--- + + +下面这Java段代码只要90纳秒[^1]: + +``` scala +int s = 0 +for (int i = 1; i <= 2000; i++) + for (int j = 1; j <= 2000; j++) + for (int k = 1; k <= 2000; k++) + s += 1 +``` +而相对应的Scala则是需要1.3秒 + +``` scala +var s = 0 +for (i <- 1 to 2000; + j <- 1 to 2000; + k <- 1 to 2000) + s += 1 +``` +同样,下面的代码和上面的完全等价: + +``` scala +(1 to 2000).foreach( + i => (1 to 2000).foreach( + j => (1 to 2000).foreach(k => s += 1) + ) +) +``` + +改为简单Scala for循环就可以快到60纳秒: + +``` scala + var s = 0 + var i = 1; var j = 1; var k = 1 + + while (i <= 2000) { j = 1 + while (j <= 2000) { k = 1 + while (k <= 2000) { k += 1 } + j += 1 } + i += 1 } +``` + +想看看Scala的for循环可能有多可怕?如果我们编译: +``` scala +object Test { + def test() {} +} +``` +我们得到以下bytecode: +``` scala + [[syntax trees at end of cleanup]]// Scala source: test.scala + package { + final object Test extends java.lang.Object with ScalaObject { + def test(): Unit = (); + def this(): object Test = { + Test.super.this(); + () + } + } + } +``` + +但是如果我们加了第一段代码到`test`方法中,我们就可以发现编译的结果中overhead是多么大: +``` scala + [[syntax trees at end of cleanup]]// Scala source: test.scala + package { + final object Test extends java.lang.Object with ScalaObject { + def test(): Unit = { + var s$1: scala.runtime.IntRef = new scala.runtime.IntRef(0); + scala.this.Predef.intWrapper(1).to(2000).foreach$mVc$sp({ + (new anonymous class Test$$anonfun$test$1(s$1): Function1) + }) + }; + def this(): object Test = { + Test.super.this(); + () + } + }; + @SerialVersionUID(0) final class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2 extends scala.runtime.AbstractFunction1$mcVI$sp with Serializable { + final def apply(k: Int): Unit = Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.this.apply$mcVI$sp(k); + def apply$mcVI$sp(v1: Int): Unit = Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.this.$outer .Test$$anonfun$$anonfun$$$outer().s$1.elem = Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.this.$outer .Test$$anonfun$$anonfun$$$outer().s$1.elem.+(1); + private[this] val $outer: anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1 = _; + final def apply(v1: java.lang.Object): java.lang.Object = { + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.this.apply(scala.Int.unbox(v1)); + scala.runtime.BoxedUnit.UNIT + }; + def this($outer: anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1): anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2 = { + if ($outer.eq(null)) + throw new java.lang.NullPointerException() + else + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.this.$outer = $outer; + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2.super.this(); + () + } + }; + @SerialVersionUID(0) final class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1 extends scala.runtime.AbstractFunction1$mcVI$sp with Serializable { + final def apply(j: Int): Unit = Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.this.apply$mcVI$sp(j); + def apply$mcVI$sp(v1: Int): Unit = scala.this.Predef.intWrapper(1).to(2000).foreach$mVc$sp({ + (new anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1$$anonfun$apply$mcVI$sp$2(Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.this): Function1) + }); + private[this] val $outer: anonymous class Test$$anonfun$test$1 = _; + def Test$$anonfun$$anonfun$$$outer(): anonymous class Test$$anonfun$test$1 = Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.this.$outer; + final def apply(v1: java.lang.Object): java.lang.Object = { + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.this.apply(scala.Int.unbox(v1)); + scala.runtime.BoxedUnit.UNIT + }; + def this($outer: anonymous class Test$$anonfun$test$1): anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1 = { + if ($outer.eq(null)) + throw new java.lang.NullPointerException() + else + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.this.$outer = $outer; + Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1.super.this(); + () + } + }; + @SerialVersionUID(0) final class Test$$anonfun$test$1 extends scala.runtime.AbstractFunction1$mcVI$sp with Serializable { + final def apply(i: Int): Unit = Test$$anonfun$test$1.this.apply$mcVI$sp(i); + def apply$mcVI$sp(v1: Int): Unit = scala.this.Predef.intWrapper(1).to(2000).foreach$mVc$sp({ + (new anonymous class Test$$anonfun$test$1$$anonfun$apply$mcVI$sp$1(Test$$anonfun$test$1.this): Function1) + }); + final def apply(v1: java.lang.Object): java.lang.Object = { + Test$$anonfun$test$1.this.apply(scala.Int.unbox(v1)); + scala.runtime.BoxedUnit.UNIT + }; + val s$1: scala.runtime.IntRef = _; + def this(s$1: scala.runtime.IntRef): anonymous class Test$$anonfun$test$1 = { + Test$$anonfun$test$1.this.s$1 = s$1; + Test$$anonfun$test$1.super.this(); + () + } + } + } + +``` +##结论 +在scala没有解决[相关的bug #1138](https://issues.scala-lang.org/browse/SI-1338)前,还是小心用它的for循环,包括foreach。 + + +##一点更新: +我在自己的工作站上测试了下面两端代码的运行时间: + +三层循环,耗时7570毫秒: + +``` scala +var s = 0 +for (i <- 1 to 2000; + j <- 1 to 2000; + k <- 1 to 2000) + s += 1 +``` + +一层循环,耗时1毫秒: + +``` scala +var s = 0 +for (i <- 1 to 2000*2000*2000) + s += 1 +``` +看来对于单层循环,scala的效率还可以接受。 + + diff --git a/source/_posts/2011-03-11-scala-annotation-specialized.markdown b/source/_posts/2011-03-11-scala-annotation-specialized.markdown new file mode 100644 index 0000000..63befc9 --- /dev/null +++ b/source/_posts/2011-03-11-scala-annotation-specialized.markdown @@ -0,0 +1,90 @@ +--- +layout: post +title: "有关Scala的 @Specialized Annotation" +date: 2011-03-11 11:00 +comments: true +categories: [Scala, specialized] +--- + + +@specialized 主要用在Scala的范型上,使得Scala编译器能够在编译的时候,为某些primative类型提供一个更加高效的实现。这种高效源于避免了对指定primative类型的boxing和unboxing(int -> Int -> int)[^1]。根据一个相关研究[^2],boxing和unboxing还是很花时间的。比如 + +``` scala + class My [A] { + def iden(x: A): A = x + } + val a = new My[Int] + for (i <- 1 to 1000000) a.iden(i) +``` + +需要花40纳秒。改为: +``` scala +class My [@specialized(Int) A] { + def iden(x: A): A = x +} +val a = new My[Int] +for (i <- 1 to 1000000) a.iden(i) +``` +就只需要17纳秒。 + +下面看一个简单的用`@specialized`的例子: +``` scala +class Example[@specialized(Int) T](value: T) { + def get(): T = value +} +``` +用命令`scalac -print test.scala`编译后变为: +``` scala + [[syntax trees at end of cleanup]]// Scala source: test.scala + package { + class Example extends java.lang.Object with ScalaObject { + protected[this] val value: java.lang.Object = _; + def get(): java.lang.Object = Example.this.value; + def get$mcI$sp(): Int = scala.Int.unbox(Example.this.get()); + def this(value: java.lang.Object): Example = { + Example.this.value = value; + Example.super.this(); + () + } + }; + class Example$mcI$sp extends Example { + protected[this] val value$mcI$sp: Int = _; + override def get(): Int = Example$mcI$sp.this.get$mcI$sp(); + override def get$mcI$sp(): Int = Example$mcI$sp.this.value$mcI$sp; + override def get(): java.lang.Object = scala.Int.box(Example$mcI$sp.this.get()); + def this(value$mcI$sp: Int): Example$mcI$sp = { + Example$mcI$sp.this.value$mcI$sp = value$mcI$sp; + Example$mcI$sp.super.this(scala.Int.box(value$mcI$sp)); + () + } + } + } +``` + +可见有两个类被生成了。如果把源文件变为: +``` scala +class Example[@specialized(Int, Long) T](value: T) { + def get(): T = value +} +``` +则有第三个类被生成: +``` scala + [[syntax trees at end of cleanup]]// Scala source: test.scala + package { + ... + ... + class Example$mcJ$sp extends Example { + protected[this] val value$mcJ$sp: Long = _; + override def get(): Long = Example$mcJ$sp.this.get$mcJ$sp(); + override def get$mcJ$sp(): Long = Example$mcJ$sp.this.value$mcJ$sp; + override def get(): java.lang.Object = scala.Long.box(Example$mcJ$sp.this.get()); + def this(value$mcJ$sp: Long): Example$mcJ$sp = { + Example$mcJ$sp.this.value$mcJ$sp = value$mcJ$sp; + Example$mcJ$sp.super.this(scala.Long.box(value$mcJ$sp)); + () + } + } + } +``` +理想情况是很多范型都对所有primative类型做specialized,不过这样编译后的代码就会成倍的增加了。Scala 2.8.1中只有下面的类有用到@specialized[^3]: +`Function0, Function1, Function2, Tuple1, Tuple2, Product1, Product2, AbstractFunction0, AbstractFunction1, AbstractFunction2`. \ No newline at end of file diff --git a/source/_posts/2011-03-12-avoid-explicit-importing-implicit-in-scala.markdown b/source/_posts/2011-03-12-avoid-explicit-importing-implicit-in-scala.markdown new file mode 100644 index 0000000..2218379 --- /dev/null +++ b/source/_posts/2011-03-12-avoid-explicit-importing-implicit-in-scala.markdown @@ -0,0 +1,41 @@ +--- +layout: post +title: "在Scala中如何避免导入implicit相关的定义" +date: 2011-03-11 13:06 +comments: true +categories: [Scala, implicit] +--- +Scala中的implicit能够让代码变的简洁很多。很多时候我们倾向于把这些implicit相关定义放到一个统一的地方,然后在各个package中应用。但每次用的时候,都需要做类是这样的import: + +``` scala + import com.readventure.MyImplicits._ +``` +下面我告诉大家一个简单的方式,可以避免这种没有必要的imports。 + +##方法 + +我们可以在com/readventure/下面定义下面这个trait: +``` scala +package com.readventure + +trait MyImplicits { + implicit def str2opt(s: String) = Option(s) + ... +} +``` +然后在每个要用到这些implicit的包下面(比如`com.readventure.test1`)放置这样叫`package.scala`的文件(文件名不重要): +``` scala +package com.readventure // not 'package.readventure.test1' +package object test1 extends a.Implicits { /* your other stuff goes here */} +``` + +注意,这里的package的名字,以及package object的名字很重要,必须和相对应的路径对应。这样在`MyImplicits`里面的所有东西在`com.readventure.test1`的任何一个类里面都可以用了,不用显式import任何东西。比如: +``` scala +package com.readventure.test1 + +class SomeClass { + def testImplicit(str: String): Option[String] = str +} +``` + +`Package object`是Scala2.8的新特性,如果有兴趣,可以看看[这里](http://www.artima.com/scalazine/articles/package_objects.html)。 diff --git a/source/_posts/2012-03-13-scala-structual-type-is-slow.markdown b/source/_posts/2012-03-13-scala-structual-type-is-slow.markdown new file mode 100644 index 0000000..6ee4ecd --- /dev/null +++ b/source/_posts/2012-03-13-scala-structual-type-is-slow.markdown @@ -0,0 +1,35 @@ +--- +layout: post +title: "Scala的structual type的确慢不少" +date: 2012-03-13 01:12 +comments: true +categories: [Scala, Strutrual type, Performance] +--- + +Scala的Structual Type是用反射机制实现的。所以会慢一些。但我关心的是我经常用到的一个情况会慢多少。比如: +``` scala +type HasId = { + def id: Long +} +case class Foo(val id: Long) +``` +我经常会这样用: +``` scala +def loop1(foos: Seq[HasId]) = { + var i = 0L + foos.foreach(f => i += f.id) + println(i) +} +``` +这样所有有id的方法我都可以用这个`loop1`方法了。如果不用Structual Type,可以这样做: +``` scala +def loop2(foos: Seq[Foo]) = { + var i = 0L + foos.foreach(f => i += f.id) + println(i) +} +``` +在我的工作站上,如果给定一个10000000这么大的Seq[Foo],`loop1`用了550ms,`loop2`用了262ms。 + +##结论 +在这种情况下,用Structual Type的效率降低一半左右。 \ No newline at end of file