Skip to content

Commit

Permalink
Merge pull request #133 from eed3si9n/wip/content_hash
Browse files Browse the repository at this point in the history
Include content hash into File hash value
  • Loading branch information
eed3si9n committed Aug 12, 2023
2 parents 35a1249 + fc0d93d commit 66f05ac
Show file tree
Hide file tree
Showing 14 changed files with 350 additions and 93 deletions.
Expand Up @@ -3,14 +3,13 @@ package sjsonnew.benchmark
import java.io.File

import org.openjdk.jmh.annotations.Benchmark
import sjsonnew.IsoString
import sjsonnew.IsoStringLong

class FileIsoStringBenchmark {
class FileIsoStringLongBenchmark {

@Benchmark
def fileB: String = {
import sjsonnew.BasicJsonProtocol._
val isoFile = implicitly[IsoString[File]]
val isoFile = implicitly[IsoStringLong[File]]
val f = new File("/tmp")
isoFile.to(f)
}
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Expand Up @@ -39,7 +39,7 @@ lazy val core = (projectMatrix in file("core"))
)
)
.jvmPlatform(scalaVersions = allScalaVersions, settings = Seq(
libraryDependencies ++= testDependencies.value,
libraryDependencies ++= testDependencies.value ++ Seq(zeroAllocationHashing),
))

def support(n: String) =
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/scala/sjsonnew/BasicJsonProtocol.scala
Expand Up @@ -29,12 +29,14 @@ trait BasicJsonProtocol
with AdditionalFormats
with UnionFormats
with FlatUnionFormats
with IsoStringLongFormats
with IsoFormats
with JavaPrimitiveFormats
with JavaExtraFormats
with CalendarFormats
with ImplicitHashWriters
with CaseClassFormats
with FileIsoStringLongs
with ThrowableFormats

object BasicJsonProtocol extends BasicJsonProtocol
31 changes: 31 additions & 0 deletions core/src/main/scala/sjsonnew/FileIsoStringLongs.scala
@@ -0,0 +1,31 @@
/*
* Copyright (C) 2023 Eugene Yokota
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sjsonnew

import java.io.File
import java.net.URI
import java.nio.file.{ Path, Paths }

trait FileIsoStringLongs {
implicit lazy val fileStringLongIso: IsoStringLong[File] = IsoStringLong.iso[File](
(file: File) => (IsoStringLong.fileToString(file), HashUtil.farmHash(file.toPath())),
(p: (String, Long)) => IsoStringLong.uriToFile(new URI(p._1)))

implicit lazy val pathStringLongIso: IsoStringLong[Path] = IsoStringLong.iso[Path](
(file: Path) => (file.toString, HashUtil.farmHash(file)),
(p: (String, Long)) => Paths.get(p._1))
}
37 changes: 37 additions & 0 deletions core/src/main/scala/sjsonnew/HashUtil.scala
@@ -1,5 +1,9 @@
package sjsonnew

import java.io.{ BufferedInputStream, File, FileInputStream, FileNotFoundException, InputStream }
import java.nio.file.{ Files, Path }
import net.openhft.hashing.LongHashFunction

object HashUtil {
// https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/hash/MurmurHash.java
def hashLong(data: Long): Int =
Expand All @@ -19,4 +23,37 @@ object HashUtil {
h ^= h >>> 15
h
}

private[sjsonnew] def farmHash(bytes: Array[Byte]): Long =
LongHashFunction.farmNa().hashBytes(bytes)

private[sjsonnew] def farmHash(path: Path): Long = {
// allocating many byte arrays for large files may lead to OOME
// but it is more efficient for small files
val largeFileLimit = 10 * 1024 * 1024
if (!Files.exists(path) || Files.isDirectory(path)) 0L
else if (Files.size(path) < largeFileLimit) farmHash(Files.readAllBytes(path))
else farmHash(sha256(path.toFile))
}

/** Calculates the SHA-1 hash of the given file. */
def sha256(file: File): Array[Byte] =
try sha256(new BufferedInputStream(new FileInputStream(file))) // apply closes the stream
catch { case _: FileNotFoundException => Array() }

/** Calculates the SHA-1 hash of the given stream, closing it when finished. */
def sha256(stream: InputStream): Array[Byte] = {
val BufferSize = 8192
import java.security.{ DigestInputStream, MessageDigest }
val digest = MessageDigest.getInstance("SHA-256")
try {
val dis = new DigestInputStream(stream, digest)
val buffer = new Array[Byte](BufferSize)
while (dis.read(buffer) >= 0) {}
dis.close()
digest.digest
} finally {
stream.close()
}
}
}
85 changes: 85 additions & 0 deletions core/src/main/scala/sjsonnew/IsoStringLong.scala
@@ -0,0 +1,85 @@
/*
* Copyright (C) 2023 Eugene Yokota
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sjsonnew

import java.io.File
import java.net.{ URI, URL }
import java.util.Locale

trait IsoStringLong[A] {
def to(a: A): (String, Long)
def from(p: (String, Long)): A
}

object IsoStringLong {
def iso[A](to0: A => (String, Long), from0: ((String, Long)) => A): IsoStringLong[A] = new IsoStringLong[A] {
def to(a: A): (String, Long) = to0(a)
def from(p: (String, Long)): A = from0(p)
}

private[sjsonnew] lazy val isWindows: Boolean =
System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows")

private[sjsonnew] final val FileScheme = "file"

private[sjsonnew] def fileToString(file: File): String = {
val p = file.getPath
if (p.startsWith(File.separatorChar.toString) && isWindows) {
if (p.startsWith("""\\""")) {
// supports \\laptop\My Documents\Some.doc on Windows
new URI(FileScheme, normalizeName(p), null).toASCIIString
}
else {
// supports /tmp on Windows
new URI(FileScheme, "", normalizeName(p), null).toASCIIString
}
} else if (file.isAbsolute) {
//not using f.toURI to avoid filesystem syscalls
//we use empty string as host to force file:// instead of just file:
new URI(FileScheme, "", normalizeName(ensureHeadSlash(file.getAbsolutePath)), null).toASCIIString
} else {
new URI(null, normalizeName(file.getPath), null).toASCIIString
}
}

private[this] def ensureHeadSlash(name: String) = {
if(name.nonEmpty && name.head != File.separatorChar) s"${File.separatorChar}$name"
else name
}
private[this] def normalizeName(name: String) = {
val sep = File.separatorChar
if (sep == '/') name else name.replace(sep, '/')
}

private[sjsonnew] def uriToFile(uri: URI): File = {
val part = uri.getSchemeSpecificPart
// scheme might be omitted for relative URI reference.
assert(
Option(uri.getScheme) match {
case None | Some(FileScheme) => true
case _ => false
},
s"Expected protocol to be '$FileScheme' or empty in URI $uri"
)
Option(uri.getAuthority) match {
case None if part startsWith "/" => new File(uri)
case _ =>
if (!(part startsWith "/") && (part contains ":")) new File("//" + part)
else new File(part)
}
}
}
42 changes: 42 additions & 0 deletions core/src/main/scala/sjsonnew/IsoStringLongFormats.scala
@@ -0,0 +1,42 @@
/*
* Copyright (C) 2023 Eugene Yokota
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sjsonnew

trait IsoStringLongFormats {
implicit def isoStringLongFormat[A: IsoStringLong]: JsonFormat[A] = new JsonFormat[A] {
val iso = implicitly[IsoStringLong[A]]
def write[J](a: A, builder: Builder[J]): Unit = {
val p = iso.to(a)
builder.beginObject()
builder.addFieldName("first")
builder.writeString(p._1)
builder.addFieldName("second")
builder.writeLong(p._2)
builder.endObject()
}
def read[J](jsOpt: Option[J], unbuilder: Unbuilder[J]): A =
jsOpt match {
case Some(js) =>
unbuilder.beginObject(js)
val first = unbuilder.readField[String]("first")
val second = unbuilder.readField[Long]("second")
unbuilder.endObject()
iso.from((first, second))
case None => deserializationError(s"Expected JsObject but got None")
}
}
}
59 changes: 2 additions & 57 deletions core/src/main/scala/sjsonnew/JavaExtraFormats.scala
Expand Up @@ -19,11 +19,10 @@ package sjsonnew
import java.net.{ URI, URL }
import java.io.File
import java.math.{ BigInteger, BigDecimal => JBigDecimal }
import java.util.{ Locale, Optional, UUID }
import java.util.{ Optional, UUID }

trait JavaExtraFormats {
this: PrimitiveFormats with AdditionalFormats with IsoFormats =>
import JavaExtraFormats._

private[this] type JF[A] = JsonFormat[A] // simple alias for reduced verbosity

Expand All @@ -40,57 +39,6 @@ trait JavaExtraFormats {
implicit val urlStringIso: IsoString[URL] = IsoString.iso[URL](
_.toURI.toASCIIString, (s: String) => (new URI(s)).toURL)

private[this] final val FileScheme = "file"

implicit val fileStringIso: IsoString[File] = IsoString.iso[File](
(f: File) => {
val p = f.getPath
if (p.startsWith(File.separatorChar.toString) && isWindows) {
if (p.startsWith("""\\""")) {
// supports \\laptop\My Documents\Some.doc on Windows
new URI(FileScheme, normalizeName(p), null).toASCIIString
}
else {
// supports /tmp on Windows
new URI(FileScheme, "", normalizeName(p), null).toASCIIString
}
} else if (f.isAbsolute) {
//not using f.toURI to avoid filesystem syscalls
//we use empty string as host to force file:// instead of just file:
new URI(FileScheme, "", normalizeName(ensureHeadSlash(f.getAbsolutePath)), null).toASCIIString
} else {
new URI(null, normalizeName(f.getPath), null).toASCIIString
}
},
(s: String) => uriToFile(new URI(s)))

private[this] def ensureHeadSlash(name: String) = {
if(name.nonEmpty && name.head != File.separatorChar) s"${File.separatorChar}$name"
else name
}
private[this] def normalizeName(name: String) = {
val sep = File.separatorChar
if (sep == '/') name else name.replace(sep, '/')
}

private[this] def uriToFile(uri: URI): File = {
val part = uri.getSchemeSpecificPart
// scheme might be omitted for relative URI reference.
assert(
Option(uri.getScheme) match {
case None | Some(FileScheme) => true
case _ => false
},
s"Expected protocol to be '$FileScheme' or empty in URI $uri"
)
Option(uri.getAuthority) match {
case None if part startsWith "/" => new File(uri)
case _ =>
if (!(part startsWith "/") && (part contains ":")) new File("//" + part)
else new File(part)
}
}

implicit def optionalFormat[A :JF]: JF[Optional[A]] = new OptionalFormat[A]
final class OptionalFormat[A :JF] extends JF[Optional[A]] {
lazy val elemFormat = implicitly[JF[A]]
Expand All @@ -112,7 +60,4 @@ trait JavaExtraFormats {
}
}

object JavaExtraFormats {
private[sjsonnew] lazy val isWindows: Boolean =
System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows")
}
object JavaExtraFormats
45 changes: 45 additions & 0 deletions core/src/main/scala/sjsonnew/PathOnlyFormats.scala
@@ -0,0 +1,45 @@
/*
* Copyright (C) 2023 Eugene Yokota
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sjsonnew

import java.io.File
import java.net.URI
import java.nio.file.{ Path, Paths }

trait PathOnlyFormats {
implicit val pathOnlyFileFormat: JsonFormat[File] = new JsonFormat[File] {
def write[J](file: File, builder: Builder[J]): Unit =
builder.writeString(IsoStringLong.fileToString(file))
def read[J](jsOpt: Option[J], unbuilder: Unbuilder[J]): File =
jsOpt match {
case Some(js) => IsoStringLong.uriToFile(new URI(unbuilder.readString(js)))
case None => deserializationError(s"Expected JsString but got None")
}
}

implicit val pathOnlyPathFormat: JsonFormat[Path] = new JsonFormat[Path] {
def write[J](file: Path, builder: Builder[J]): Unit =
builder.writeString(file.toString)
def read[J](jsOpt: Option[J], unbuilder: Unbuilder[J]): Path =
jsOpt match {
case Some(js) => Paths.get(unbuilder.readString(js))
case None => deserializationError(s"Expected JsString but got None")
}
}
}

object PathOnlyFormats extends PathOnlyFormats
2 changes: 2 additions & 0 deletions core/src/main/scala/sjsonnew/package.scala
Expand Up @@ -24,9 +24,11 @@ package object sjsonnew
with AdditionalFormats
with UnionFormats
with FlatUnionFormats
with IsoStringLongFormats
with IsoFormats
with JavaPrimitiveFormats
with ThrowableFormats
with FileIsoStringLongs
with ImplicitHashWriters
{
def deserializationError(msg: String, cause: Throwable = null, fieldNames: List[String] = Nil) = throw new DeserializationException(msg, cause, fieldNames)
Expand Down
1 change: 1 addition & 0 deletions project/Dependencies.scala
Expand Up @@ -20,4 +20,5 @@ object Dependencies {
lazy val jawnSpray = "org.typelevel" %% "jawn-spray" % jawnVersion
lazy val shadedJawnParser = "com.eed3si9n" %% "shaded-jawn-parser" % "1.3.2"
lazy val lmIvy = "org.scala-sbt" %% "librarymanagement-ivy" % "1.2.4"
lazy val zeroAllocationHashing = "net.openhft" % "zero-allocation-hashing" % "0.10.1"
}

0 comments on commit 66f05ac

Please sign in to comment.