Skip to content

Commit

Permalink
Adding/fixing docs.
Browse files Browse the repository at this point in the history
  • Loading branch information
fnothaft committed Nov 3, 2016
1 parent aa776c0 commit 1ed9c31
Showing 1 changed file with 37 additions and 11 deletions.
Expand Up @@ -155,7 +155,7 @@ class ForestSerializer[T: ClassTag, TS <: Serializer[T]](
}

/**
* Implements a shuffle free broadcast region join.
* Implements a shuffle free (broadcast) region join.
*
* The broadcast values are stored in a sorted array. It was going to be an
* ensemble of interval trees, but, that didn't work out.
Expand Down Expand Up @@ -199,10 +199,13 @@ trait TreeRegionJoin[T, U] {
}
}

/**
* Implements an inner region join where the left side of the join is broadcast.
*/
case class InnerTreeRegionJoin[T, U]() extends RegionJoin[T, U, T, U] with TreeRegionJoin[T, U] {

/**
* Performs a region join between two RDDs.
* Performs an inner region join between two RDDs.
*
* @param baseRDD The 'left' side of the join
* @param joinedRDD The 'right' side of the join
Expand All @@ -225,19 +228,25 @@ case class InnerTreeRegionJoin[T, U]() extends RegionJoin[T, U, T, U] with TreeR
}
}

/**
* Implements a right outer region join where the left side of the join is
* broadcast.
*/
case class RightOuterTreeRegionJoin[T, U]() extends RegionJoin[T, U, Option[T], U] with TreeRegionJoin[T, U] {

/**
* Performs a region join between two RDDs.
* Performs a right outer region join between two RDDs.
*
* @param baseRDD The 'left' side of the join
* @param joinedRDD The 'right' side of the join
* @param tManifest implicit type of baseRDD
* @param uManifest implicit type of joinedRDD
* @tparam T type of baseRDD
* @tparam U type of joinedRDD
* @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region
* corresponding to x overlaps the region corresponding to y.
* @return An RDD of pairs (Option[x], y), where the optional x value is from
* baseRDD, y is from joinedRDD, and the region corresponding to x overlaps
* the region corresponding to y. If there are no keys in the baseRDD that
* overlap a given key (y) from the joinedRDD, x will be None.
*/
def partitionAndJoin(
baseRDD: RDD[(ReferenceRegion, T)],
Expand All @@ -256,19 +265,27 @@ case class RightOuterTreeRegionJoin[T, U]() extends RegionJoin[T, U, Option[T],
}
}

/**
* Performs an inner region join, followed logically by grouping by the right
* value. This is implemented without any shuffling; the join naturally returns
* values on the left grouped by the right value.
*/
case class InnerTreeRegionJoinAndGroupByRight[T, U]() extends RegionJoin[T, U, Iterable[T], U] with TreeRegionJoin[T, U] {

/**
* Performs a region join between two RDDs.
* Performs an inner join between two RDDs, followed by a groupBy on the
* right object.
*
* @param baseRDD The 'left' side of the join
* @param joinedRDD The 'right' side of the join
* @param tManifest implicit type of baseRDD
* @param uManifest implicit type of joinedRDD
* @tparam T type of baseRDD
* @tparam U type of joinedRDD
* @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region
* corresponding to x overlaps the region corresponding to y.
* @return An RDD of pairs (Iterable[x], y), where the Iterable[x] is from
* baseRDD, y is from joinedRDD, and all values in the Iterable[x] are
* aligned at regions that overlap the region corresponding to y. If the
* iterable is empty, the key-value pair is filtered out.
*/
def partitionAndJoin(
baseRDD: RDD[(ReferenceRegion, T)],
Expand All @@ -279,19 +296,28 @@ case class InnerTreeRegionJoinAndGroupByRight[T, U]() extends RegionJoin[T, U, I
}
}

/**
* Performs a right outer region join, followed logically by grouping by the right
* value. This is implemented without any shuffling; the join naturally returns
* values on the left grouped by the right value. In this implementation, empty
* collections on the left side of the join are kept.
*/
case class RightOuterTreeRegionJoinAndGroupByRight[T, U]() extends RegionJoin[T, U, Iterable[T], U] with TreeRegionJoin[T, U] {

/**
* Performs a region join between two RDDs.
* Performs an inner join between two RDDs, followed by a groupBy on the
* right object.
*
* @param baseRDD The 'left' side of the join
* @param joinedRDD The 'right' side of the join
* @param tManifest implicit type of baseRDD
* @param uManifest implicit type of joinedRDD
* @tparam T type of baseRDD
* @tparam U type of joinedRDD
* @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region
* corresponding to x overlaps the region corresponding to y.
* @return An RDD of pairs (Iterable[x], y), where the Iterable[x] is from
* baseRDD, y is from joinedRDD, and all values in the Iterable[x] are
* aligned at regions that overlap the region corresponding to y. If the
* iterable is empty, the key-value pair is NOT filtered out.
*/
def partitionAndJoin(
baseRDD: RDD[(ReferenceRegion, T)],
Expand Down

0 comments on commit 1ed9c31

Please sign in to comment.