From 131b42d02806579cd66935510274ee436834d805 Mon Sep 17 00:00:00 2001
From: ayman <aymanalbaz98@gmail.com>
Date: Wed, 9 Jun 2021 18:32:11 -0400
Subject: [PATCH] First commit

---
 CHANGELOG.md             |  12 +++
 LICENSE.txt              | 202 +++++++++++++++++++++++++++++++++++++++
 README.md                | 129 +++++++++++++++++++++++++
 distances.nimble         |  14 +++
 src/distances/seq.nim    | 159 ++++++++++++++++++++++++++++++
 src/distances/tensor.nim | 159 ++++++++++++++++++++++++++++++
 src/distances/vector.nim | 159 ++++++++++++++++++++++++++++++
 tests/config.nims        |   1 +
 tests/test_seq.nim       |  91 ++++++++++++++++++
 tests/test_tensor.nim    |  91 ++++++++++++++++++
 tests/test_vector.nim    |  90 +++++++++++++++++
 11 files changed, 1107 insertions(+)
 create mode 100644 CHANGELOG.md
 create mode 100644 LICENSE.txt
 create mode 100644 README.md
 create mode 100644 distances.nimble
 create mode 100644 src/distances/seq.nim
 create mode 100644 src/distances/tensor.nim
 create mode 100644 src/distances/vector.nim
 create mode 100644 tests/config.nims
 create mode 100644 tests/test_seq.nim
 create mode 100644 tests/test_tensor.nim
 create mode 100644 tests/test_vector.nim

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8a19316
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,12 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+
+## [0.1.0] - 2021-06-09
+### Added
+- Created distances library
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..692c6f0
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 Ayman Albaz
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..041aa8a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,129 @@
+# Distances
+
+Distances is a high performance Nim library for calculating distances.
+
+This library is designed to allow users to calculate common distance metrics across all the popular sequence based libraries in Nim.
+
+## Supported Libraries
+Current supported sequence based libraries include:
+1. [sequtils](https://github.com/nim-lang/Nim/blob/devel/lib/pure/collections/sequtils.nim)
+2. [arraymancer](https://github.com/mratsim/Arraymancer)
+3. [neo](https://github.com/andreaferretti/neo)
+
+
+## Supported Distance Metrics
+Current supported distance metrics include:
+
+| Distance          | Command                         |
+|-------------------|---------------------------------|
+| Hamming           | hamming_distance(x1, x2)        |
+| Euclidean         | euclidean_distance(x1, x2)      |
+| Squared Euclidean | sqeuclidean_distance(x1, x2)    |
+| City Block        | cityblock_distance(x1, x2)      |
+| Total Variation   | totalvariation_distance(x1, x2) |
+| Jaccard           | jaccard_distance(x1, x2)        |
+| Cosine            | cosine_distance(x1, x2)         |
+| KL Divergence     | kldivergence_distance(x1, x2)   |
+
+## Examples 
+
+### Calculating Cosine Distance - sequtils
+Note: All computations are done row-wise.
+```Nim
+import sequtils
+import distances/seq
+
+let 
+    num_rows = 100
+    num_cols = 100
+    input_seq_int = newSeq[int](num_cols)
+    input_seq_seq_int = newSeqWith(num_rows, newSeq[int](num_cols))
+
+# 1D distance
+echo cosine_distance(input_seq_int, input_seq_int)
+
+# 2D distance (Pairwise)
+echo pairwise(input_seq_seq_int, cosine_distance)
+```
+
+
+### Calculating Cosine Distance - arraymancer
+Note: Only 2D Tensors are supported. All computations are done column wise.
+```Nim
+import arraymancer
+import distances/tensor
+
+let 
+    num_rows = 100
+    num_cols = 100
+    input_tensor_1d_int = zeros[int](1, num_cols)
+    input_tensor_2d_int = zeros[int](num_rows, num_cols)
+
+# 1D distance
+echo cosine_distance(input_tensor_1d_int, input_tensor_1d_int)
+
+# 2D distance (Pairwise)
+echo pairwise(input_tensor_2d_int, cosine_distance)
+```
+
+
+### Calculating Cosine Distance - neo
+Note: All computations are done column wise.
+Warning: Neo matrices seem to run 1-2 order of magnitudes slower than sequtils and arraymancer. Please contact me or submit a PR if you know why.
+```Nim
+import neo
+import distances/vector
+
+let 
+    num_rows = 100
+    num_cols = 100
+    input_vector_int = makeVector(num_cols, proc(i: int): int = 0)
+    input_matrix_int = makeMatrix(num_rows, num_cols, proc(i, j: int): int = 0)
+
+# 1D distance
+echo cosine_distance(input_vector_int, input_vector_int)
+
+# 2D distance (Pairwise)
+echo pairwise(input_matrix_int, cosine_distance)
+```
+
+### Normalization
+All distance metrics support the optional `normalize` (defaults to `false`) parameter. This normalizes distance outputs (between -1 and 1). Note, while all distance metrics have this parameter only, it will do nothing for jaccard, cosine, and KL divergence distances.
+
+E.g.
+```Nim
+discard cosine_distance(input_vector_int, input_vector_int, normalize=true)
+discard pairwise(input_matrix_int, cosine_distance, normalize=true)
+```
+
+### Symmetry
+The `pairwise` procs always compute the lower left triangle of the 2D sequence to save time. To get a full matrix, use the `symmetrize(X, how: string = "l=>u")` proc.
+
+E.g.
+```Nim
+discard symmetrize(X, "l=>u")	# Copy lower left triangle to upper right triangle
+discard symmetrize(X, "u=>l")	# Copy upper right triangle to lower left triangle
+``` 
+
+## Performance
+
+To get optimal performance, here are the recommended compiler flags:
+`nim --cc:gcc --passC:"-fopenmp -ffast-math" --passL:"-fopenmp -ffast-math" --d:release -t:-mavx2 -t:-mfma c -r myScript.nim`.
+- openmp -> Multiprocessing for the `pairwise` procs. Number of threads is equal to ENV variable `OMP_NUM_THREADS`.
+- ffast-math -> ~2x float multiplication speedups
+- d:release -> ~100x pairwise speedup
+- d:danger -> ~120x pairwise speedup
+- t:-mavx2 and -t:-mfma -> ~0.20x pairwise speedup
+
+
+## TODO
+- Neo matrices seem to run 1-2 orders of magnitudes slower than sequtils and arraymancer. The reason is unknown to me.
+- Add more distance metrics
+- Add support for distance metrics with more than 2 arguments
+
+Performance, feature, and documentation PR's are always welcome.
+
+
+## Contact
+I can be reached at aymanalbaz98@gmail.com
+
diff --git a/distances.nimble b/distances.nimble
new file mode 100644
index 0000000..c661f97
--- /dev/null
+++ b/distances.nimble
@@ -0,0 +1,14 @@
+# Package
+
+version       = "0.1.0"
+author        = "ayman albaz"
+description   = "Distances is a high performance Nim library for calculating distances."
+license       = "Apache License 2.0"
+srcDir        = "src"
+
+
+# Dependencies
+
+requires "nim >= 1.4.6"
+requires "arraymancer >= 0.6.2"
+requires "neo >= 0.3.1"
\ No newline at end of file
diff --git a/src/distances/seq.nim b/src/distances/seq.nim
new file mode 100644
index 0000000..87121b6
--- /dev/null
+++ b/src/distances/seq.nim
@@ -0,0 +1,159 @@
+type
+    Submodule* = object
+        name*: string
+
+proc initSubmodule*(): Submodule =
+    Submodule(name: "Anonymous")
+
+
+import math
+import sequtils
+
+{.nanChecks: on, infChecks: on.}
+proc hamming_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: int32 = 0
+    for k in 0..num_cols-1:
+        total += (x1[k] != x2[k]).int32
+    if normalize:
+        return total.float / num_cols.float
+    else:
+        return total.float
+
+
+proc euclidean_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total_intermediate = (x1[k] - x2[k])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return sqrt(total.float) / num_cols.float
+    else:
+        return sqrt(total.float)
+
+
+proc sqeuclidean_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total_intermediate = (x1[k] - x2[k])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return total.float / num_cols.float
+    else:
+        return total.float
+
+
+proc cityblock_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total += abs(x1[k] - x2[k])
+    if normalize:
+        return total.float / num_cols.float
+    else:
+        return total.float
+
+
+proc totalvariation_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total += abs(x1[k] - x2[k])
+    if normalize:
+        return total.float / 2.0.float / num_cols.float
+    else:
+        return total.float / 2.0.float
+
+
+proc jaccard_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var 
+        total_min: typeof(x1[0]) = 0
+        total_max: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total_min += min(x1[k], x2[k])
+        total_max += max(x1[k], x2[k])
+    if total_max == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - (total_min / total_max).float
+
+
+proc cosine_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var 
+        total_x12: typeof(x1[0]) = 0
+        total_x11: typeof(x1[0]) = 0
+        total_x22: typeof(x1[0]) = 0
+    for k in 0..num_cols-1:
+        total_x12 += x1[k] * x2[k]
+        total_x11 += x1[k] * x1[k]
+        total_x22 += x2[k] * x2[k]
+    if total_x11 == 0 or total_x22 == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - total_x12.float / (sqrt(total_x11.float) * sqrt(total_x22.float))
+
+
+proc kldivergence_distance*[T: SomeNumber](x1, x2: seq[T], normalize: bool = false): float =
+    let num_cols = len(x1)
+    var total: float
+    for k in 0..num_cols-1:
+        if x1[k] == 0 and x2[k] == 0:
+            continue
+        else:
+            total += x1[k].float * ln(x1[k] / x2[k]).float
+    return total.float
+
+
+proc pairwise*[T: SomeNumber](X: seq[seq[T]], distance: (proc(x1, x2: seq[T], normalize: bool): float), normalize: bool = false): seq[seq[float]] =
+    # Computes the pairwise distance of a 2D matrix across the selected distance
+    let num_rows = len(X)
+    var dist = newSeqWith(num_rows, newSeq[float](num_rows))
+    for i in 0||(num_rows-1):
+        for j in 0..i:
+            dist[i][j] = distance(X[i], X[j], normalize)
+    return dist
+
+
+proc symmetrize*(X: seq[seq[SomeNumber]], how: string = "l=>u"): seq[seq[SomeNumber]] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = len(X)
+    let num_cols = len(X[0])
+    var X_copy = X
+    if num_rows != num_cols:
+        raise newException(ValueError, "Tensor must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[j][i] = X[i][j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[i][j] = X[j][i]
+    return X_copy
+
+
+proc symmetrize*(X: var seq[seq[SomeNumber]], how: string = "l=>u"): seq[seq[SomeNumber]] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = len(X)
+    let num_cols = len(X[0])
+    if num_rows != num_cols:
+        raise newException(ValueError, "Tensor must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[j][i] = X[i][j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[i][j] = X[j][i]
+    return X
diff --git a/src/distances/tensor.nim b/src/distances/tensor.nim
new file mode 100644
index 0000000..4ea1a32
--- /dev/null
+++ b/src/distances/tensor.nim
@@ -0,0 +1,159 @@
+type
+    Submodule* = object
+        name*: string
+
+proc initSubmodule*(): Submodule =
+    Submodule(name: "Anonymous")
+
+
+import Arraymancer
+import math
+
+{.nanChecks: on, infChecks: on.}
+proc hamming_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: int32 = 0
+    for k in 0..num_rows-1:
+        total += (x1[k, 0] != x2[k, 0]).int32
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc euclidean_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_intermediate = (x1[k, 0] - x2[k, 0])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return sqrt(total.float) / num_rows.float
+    else:
+        return sqrt(total.float)
+
+
+proc sqeuclidean_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_intermediate = (x1[k, 0] - x2[k, 0])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc cityblock_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total += abs(x1[k, 0] - x2[k, 0])
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc totalvariation_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total += abs(x1[k, 0] - x2[k, 0])
+    if normalize:
+        return total.float / 2.0.float / num_rows.float
+    else:
+        return total.float / 2.0.float
+
+
+proc jaccard_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var 
+        total_min: typeof(x1[0]) = 0
+        total_max: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_min += min(x1[k, 0], x2[k, 0])
+        total_max += max(x1[k, 0], x2[k, 0])
+    if total_max == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - (total_min / total_max).float
+
+
+proc cosine_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var 
+        total_x12: typeof(x1[0]) = 0
+        total_x11: typeof(x1[0]) = 0
+        total_x22: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_x12 += x1[k, 0] * x2[k, 0]
+        total_x11 += x1[k, 0] * x1[k, 0]
+        total_x22 += x2[k, 0] * x2[k, 0]
+    if total_x11 == 0 or total_x22 == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - total_x12.float / (sqrt(total_x11.float) * sqrt(total_x22.float))
+
+
+proc kldivergence_distance*[T: SomeNumber](x1, x2: Tensor[T], normalize: bool = false): float =
+    let num_rows = x1.shape[0]
+    var total: float
+    for k in 0..num_rows-1:
+        if x1[k, 0] == 0 and x2[k, 0] == 0:
+            continue
+        else:
+            total += x1[k, 0].float * ln(x1[k, 0] / x2[k, 0]).float
+    return total.float
+
+
+proc pairwise*[T: SomeNumber](X: Tensor[T], distance: (proc(x1, x2: Tensor[T], normalize: bool): float), normalize: bool = false): Tensor[float] =
+    # Computes the pairwise distance of a 2D matrix across the selected distance
+    let num_cols = X.shape[1]
+    var dist = zeros[float](num_cols, num_cols)
+    for i in 0||(num_cols-1):
+        for j in 0..i:
+            dist[i, j] = distance(X[0.._, i], X[0.._, j], normalize)
+    return dist
+
+
+proc symmetrize*(X: Tensor[SomeNumber], how: string = "l=>u"): Tensor[SomeNumber] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = X.shape[0]
+    let num_cols = X.shape[1]
+    var X_copy = X
+    if num_rows != num_cols:
+        raise newException(ValueError, "Matrix must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[j, i] = X[i, j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[i, j] = X[j, i]
+    return X_copy
+
+
+proc symmetrize*(X: var Tensor[SomeNumber], how: string = "l=>u"): Tensor[SomeNumber] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = X.shape[0]
+    let num_cols = X.shape[1]
+    if num_rows != num_cols:
+        raise newException(ValueError, "Matrix must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[j, i] = X[i, j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[i, j] = X[j, i]
+    return X
\ No newline at end of file
diff --git a/src/distances/vector.nim b/src/distances/vector.nim
new file mode 100644
index 0000000..03a54ec
--- /dev/null
+++ b/src/distances/vector.nim
@@ -0,0 +1,159 @@
+type
+    Submodule* = object
+        name*: string
+
+proc initSubmodule*(): Submodule =
+    Submodule(name: "Anonymous")
+
+
+import math
+import neo
+
+{.nanChecks: on, infChecks: on.}
+proc hamming_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: int32 = 0
+    for k in 0..num_rows-1:
+        total += (x1[k] != x2[k]).int32
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc euclidean_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_intermediate = (x1[k] - x2[k])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return sqrt(total.float) / num_rows.float
+    else:
+        return sqrt(total.float)
+
+
+proc sqeuclidean_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: typeof(x1[0]) = 0
+    var total_intermediate: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_intermediate = (x1[k] - x2[k])
+        total += total_intermediate * total_intermediate
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc cityblock_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total += abs(x1[k] - x2[k])
+    if normalize:
+        return total.float / num_rows.float
+    else:
+        return total.float
+
+
+proc totalvariation_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total += abs(x1[k] - x2[k])
+    if normalize:
+        return total.float / 2.0.float / num_rows.float
+    else:
+        return total.float / 2.0.float
+
+
+proc jaccard_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var 
+        total_min: typeof(x1[0]) = 0
+        total_max: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_min += min(x1[k], x2[k])
+        total_max += max(x1[k], x2[k])
+    if total_max == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - (total_min / total_max).float
+
+
+proc cosine_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var 
+        total_x12: typeof(x1[0]) = 0
+        total_x11: typeof(x1[0]) = 0
+        total_x22: typeof(x1[0]) = 0
+    for k in 0..num_rows-1:
+        total_x12 += x1[k] * x2[k]
+        total_x11 += x1[k] * x1[k]
+        total_x22 += x2[k] * x2[k]
+    if total_x11 == 0 or total_x22 == 0:
+        return 0.0.float
+    else:
+        return 1.0.float - total_x12.float / (sqrt(total_x11.float) * sqrt(total_x22.float))
+
+
+proc kldivergence_distance*[T: SomeNumber](x1, x2: Vector[T], normalize: bool = false): float =
+    let num_rows = x1.len
+    var total: float
+    for k in 0..num_rows-1:
+        if x1[k] == 0 and x2[k] == 0:
+            continue
+        else:
+            total += x1[k].float * ln(x1[k] / x2[k]).float
+    return total.float
+
+
+proc pairwise*[T](X: Matrix[T], distance: (proc(x1, x2: Vector[T], normalize: bool): float), normalize: bool = false): Matrix[float] =
+    # Computes the pairwise distance of a 2D matrix (col-major-wise) across the selected distance
+    let num_cols = X.N
+    var dist = makeMatrix(num_cols, num_cols, proc(i, j: int): float = 0.float)
+    for i in 0..(num_cols-1):
+        for j in 0..i:
+            dist[i, j] = distance(X.column(i), X.column(j), normalize)
+    return dist
+
+
+proc symmetrize*(X: Matrix[SomeNumber], how: string = "l=>u"): Matrix[SomeNumber] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = X.M
+    let num_cols = X.N
+    var X_copy = X
+    if num_rows != num_cols:
+        raise newException(ValueError, "Matrix must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[j, i] = X[i, j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X_copy[i, j] = X[j, i]
+    return X_copy
+
+
+proc symmetrize*(X: var Matrix[SomeNumber], how: string = "l=>u"): Matrix[SomeNumber] = 
+    # Mutates X symmetrical by copying upper triangle to lower triangle ("u=>l") or lower triangle to upper triangle ("l=>r")
+    let num_rows = X.M
+    let num_cols = X.N
+    if num_rows != num_cols:
+        raise newException(ValueError, "Matrix must be symmetrical")
+    if how != "l=>u" and how != "u=>l":
+        raise newException(ValueError, "'how' must be one of 'l=>u' or 'u=>l'")
+    if how == "l=>u":
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[j, i] = X[i, j]
+    else:
+        for i in 0..(num_cols-1):
+            for j in 0..i:
+                X[i, j] = X[j, i]
+    return X
diff --git a/tests/config.nims b/tests/config.nims
new file mode 100644
index 0000000..3bb69f8
--- /dev/null
+++ b/tests/config.nims
@@ -0,0 +1 @@
+switch("path", "$projectDir/../src")
\ No newline at end of file
diff --git a/tests/test_seq.nim b/tests/test_seq.nim
new file mode 100644
index 0000000..ce4bac6
--- /dev/null
+++ b/tests/test_seq.nim
@@ -0,0 +1,91 @@
+import sequtils
+import times
+import unittest
+
+import distances/seq
+
+
+# Globals
+let
+    num_rows = 10
+    num_cols = 10
+    input_seq_seq_int = newSeqWith(num_rows, newSeq[int](num_cols))
+    input_seq_seq_float = newSeqWith(num_rows, newSeq[float](num_cols))
+    output_seq_float = newSeqWith(num_rows, newSeq[float](num_rows))
+    normalize = true
+
+suite "Seq":
+    
+    setup:
+        let t0 = getTime()
+
+    teardown:
+        echo "\n  RUNTIME: ", getTime() - t0
+
+    test "pairwise int hamming":
+        check pairwise(input_seq_seq_int, hamming_distance, normalize) == output_seq_float
+
+    test "pairwise int euclidean":
+        check pairwise(input_seq_seq_int, euclidean_distance, normalize) == output_seq_float
+
+    test "pairwise int sqeuclidean":
+        check pairwise(input_seq_seq_int, sqeuclidean_distance, normalize) == output_seq_float
+
+    test "pairwise int cityblock":
+        check pairwise(input_seq_seq_int, cityblock_distance, normalize) == output_seq_float
+
+    test "pairwise int totalvariation":
+        check pairwise(input_seq_seq_int, totalvariation_distance, normalize) == output_seq_float
+
+    test "pairwise int jaccard":
+        check pairwise(input_seq_seq_int, jaccard_distance, normalize) == output_seq_float
+
+    test "pairwise int cosine":
+        check pairwise(input_seq_seq_int, cosine_distance, normalize) == output_seq_float
+
+    test "pairwise int kldivergence":
+        check pairwise(input_seq_seq_int, kldivergence_distance, normalize) == output_seq_float
+
+    test "pairwise float hamming":
+        check pairwise(input_seq_seq_float, hamming_distance, normalize) == output_seq_float
+
+    test "pairwise float euclidean":
+        check pairwise(input_seq_seq_float, euclidean_distance, normalize) == output_seq_float
+
+    test "pairwise float sqeuclidean":
+        check pairwise(input_seq_seq_float, sqeuclidean_distance, normalize) == output_seq_float
+
+    test "pairwise float cityblock":
+        check pairwise(input_seq_seq_float, cityblock_distance, normalize) == output_seq_float
+
+    test "pairwise float totalvariation":
+        check pairwise(input_seq_seq_float, totalvariation_distance, normalize) == output_seq_float
+
+    test "pairwise float jaccard":
+        check pairwise(input_seq_seq_float, jaccard_distance, normalize) == output_seq_float
+
+    test "pairwise float cosine":
+        check pairwise(input_seq_seq_float, cosine_distance, normalize) == output_seq_float
+
+    test "pairwise float kldivergence":
+        check pairwise(input_seq_seq_float, kldivergence_distance, normalize) == output_seq_float
+
+    test "symmetrize l=>u":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]]
+        let output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]]
+        check symmetrize(input) == output
+
+    test "symmetrize seq sesq u=>l":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]]
+        let output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]]
+        check symmetrize(input, "u=>l") == output
+
+    test "symmetrize mut l=>u":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]]
+        var output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]]
+        check symmetrize(input) == output
+
+    test "symmetrize mut u=>l":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]]
+        var output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]]
+        check symmetrize(input, "u=>l") == output
diff --git a/tests/test_tensor.nim b/tests/test_tensor.nim
new file mode 100644
index 0000000..6ee8ee9
--- /dev/null
+++ b/tests/test_tensor.nim
@@ -0,0 +1,91 @@
+import arraymancer
+import times
+import unittest
+
+import distances/tensor
+
+# Globals
+let
+    num_rows = 10
+    num_cols = 10
+    input_tensor_int = zeros[int](num_rows, num_cols)
+    input_tensor_float = zeros[float](num_rows, num_cols)
+    output_tensor_float = zeros[float](num_cols, num_cols)
+    normalize = true
+
+suite "Tensor":
+    
+    setup:
+        let t0 = getTime()
+
+    teardown:
+        echo "\n  RUNTIME: ", getTime() - t0
+
+    test "pairwise int hamming":
+        check pairwise(input_tensor_int, hamming_distance, normalize) == output_tensor_float
+
+    test "pairwise int euclidean":
+        check pairwise(input_tensor_int, euclidean_distance, normalize) == output_tensor_float
+
+    test "pairwise int sqeuclidean":
+        check pairwise(input_tensor_int, sqeuclidean_distance, normalize) == output_tensor_float
+
+    test "pairwise int cityblock":
+        check pairwise(input_tensor_int, cityblock_distance, normalize) == output_tensor_float
+
+    test "pairwise int totalvariation":
+        check pairwise(input_tensor_int, totalvariation_distance, normalize) == output_tensor_float
+
+    test "pairwise int jaccard":
+        check pairwise(input_tensor_int, jaccard_distance, normalize) == output_tensor_float
+
+    test "pairwise int cosine":
+        check pairwise(input_tensor_int, cosine_distance, normalize) == output_tensor_float
+
+    test "pairwise int kldivergence":
+        check pairwise(input_tensor_int, kldivergence_distance, normalize) == output_tensor_float
+
+    test "pairwise float hamming":
+        check pairwise(input_tensor_float, hamming_distance, normalize) == output_tensor_float
+
+    test "pairwise float euclidean":
+        check pairwise(input_tensor_float, euclidean_distance, normalize) == output_tensor_float
+
+    test "pairwise float sqeuclidean":
+        check pairwise(input_tensor_float, sqeuclidean_distance, normalize) == output_tensor_float
+
+    test "pairwise float cityblock":
+        check pairwise(input_tensor_float, cityblock_distance, normalize) == output_tensor_float
+
+    test "pairwise float totalvariation":
+        check pairwise(input_tensor_float, totalvariation_distance, normalize) == output_tensor_float
+
+    test "pairwise float jaccard":
+        check pairwise(input_tensor_float, jaccard_distance, normalize) == output_tensor_float
+
+    test "pairwise float cosine":
+        check pairwise(input_tensor_float, cosine_distance, normalize) == output_tensor_float
+
+    test "pairwise float kldivergence":
+        check pairwise(input_tensor_float, kldivergence_distance, normalize) == output_tensor_float
+
+    test "symmetrize tensor l=>u":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].toTensor()
+        let output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]].toTensor()
+        check symmetrize(input) == output
+
+    test "symmetrize tensor u=>l":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].toTensor()
+        let output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]].toTensor()
+        check symmetrize(input, "u=>l") == output
+
+    test "symmetrize mut tensor l=>u":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].toTensor()
+        var output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]].toTensor()
+        check symmetrize(input) == output
+
+    test "symmetrize mut tensor u=>l":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].toTensor()
+        var output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]].toTensor()
+        check symmetrize(input, "u=>l") == output
+
diff --git a/tests/test_vector.nim b/tests/test_vector.nim
new file mode 100644
index 0000000..57bf57c
--- /dev/null
+++ b/tests/test_vector.nim
@@ -0,0 +1,90 @@
+import neo
+import times
+import unittest
+
+import distances/vector
+
+# Globals
+let
+    num_rows = 10
+    num_cols = 10
+    input_matrix_int = makeMatrix(num_rows, num_cols, proc(i, j: int): int = 0.int)
+    input_matrix_float = makeMatrix(num_rows, num_cols, proc(i, j: int): float = 0.float)
+    output_matrix_float = makeMatrix(num_rows, num_rows, proc(i, j: int): float = 0.float)
+    normalize = true
+
+suite "Vector":
+    
+    setup:
+        let t0 = getTime()
+
+    teardown:
+        echo "\n  RUNTIME: ", getTime() - t0
+
+    test "pairwise int hamming":
+        check pairwise(input_matrix_int, hamming_distance, normalize) == output_matrix_float
+
+    test "pairwise int euclidean":
+        check pairwise(input_matrix_int, euclidean_distance, normalize) == output_matrix_float
+
+    test "pairwise int sqeuclidean":
+        check pairwise(input_matrix_int, sqeuclidean_distance, normalize) == output_matrix_float
+
+    test "pairwise int cityblock":
+        check pairwise(input_matrix_int, cityblock_distance, normalize) == output_matrix_float
+
+    test "pairwise int totalvariation":
+        check pairwise(input_matrix_int, totalvariation_distance, normalize) == output_matrix_float
+
+    test "pairwise int jaccard":
+        check pairwise(input_matrix_int, jaccard_distance, normalize) == output_matrix_float
+
+    test "pairwise int cosine":
+        check pairwise(input_matrix_int, cosine_distance, normalize) == output_matrix_float
+
+    test "pairwise int kldivergence":
+        check pairwise(input_matrix_int, kldivergence_distance, normalize) == output_matrix_float
+
+    test "pairwise float hamming":
+        check pairwise(input_matrix_float, hamming_distance, normalize) == output_matrix_float
+
+    test "pairwise float euclidean":
+        check pairwise(input_matrix_float, euclidean_distance, normalize) == output_matrix_float
+
+    test "pairwise float sqeuclidean":
+        check pairwise(input_matrix_float, sqeuclidean_distance, normalize) == output_matrix_float
+
+    test "pairwise float cityblock":
+        check pairwise(input_matrix_float, cityblock_distance, normalize) == output_matrix_float
+
+    test "pairwise float totalvariation":
+        check pairwise(input_matrix_float, totalvariation_distance, normalize) == output_matrix_float
+
+    test "pairwise float jaccard":
+        check pairwise(input_matrix_float, jaccard_distance, normalize) == output_matrix_float
+
+    test "pairwise float cosine":
+        check pairwise(input_matrix_float, cosine_distance, normalize) == output_matrix_float
+
+    test "pairwise float kldivergence":
+        check pairwise(input_matrix_float, kldivergence_distance, normalize) == output_matrix_float
+
+    test "symmetrize l=>u":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].matrix()
+        let output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]].matrix()
+        check symmetrize(input) == output
+
+    test "symmetrize u=>l":
+        let input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].matrix()
+        let output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]].matrix()
+        check symmetrize(input, "u=>l") == output
+
+    test "symmetrize mut l=>u":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].matrix()
+        var output = @[@[1, 11, 111], @[11, 22, 222], @[111, 222, 333]].matrix()
+        check symmetrize(input) == output
+
+    test "symmetrize mut u=>l":
+        var input = @[@[1, 2, 3], @[11, 22, 33], @[111, 222, 333]].matrix()
+        var output = @[@[1, 2, 3], @[2, 22, 33], @[3, 33, 333]].matrix()
+        check symmetrize(input, "u=>l") == output