### Map Reduce

A stateless programming model, which became popular as a very convenient way for distributed processing of data.

We have two primary operations
 - Map : Transform each element in a collection
 - Reduce : Go from a collection of elements to a value.
 
One more thing that is necessary is grouping elements.

In [1]:
// Ex: Map
val a: List[String] = List("1", "2", "3", "4", "5", "6")
val integers = a.map( v => v.toInt )

[36ma[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"1"[39m, [32m"2"[39m, [32m"3"[39m, [32m"4"[39m, [32m"5"[39m, [32m"6"[39m)
[36mintegers[39m: [32mList[39m[[32mInt[39m] = [33mList[39m([32m1[39m, [32m2[39m, [32m3[39m, [32m4[39m, [32m5[39m, [32m6[39m)

In [2]:
// Ex: Reduce
val reduction = integers.reduce( (a, b) => a + b )

[36mreduction[39m: [32mInt[39m = [32m21[39m

In [3]:
// Grouping
val evenOdd = integers.groupBy( a => a % 2 == 0)

[36mevenOdd[39m: [32mMap[39m[[32mBoolean[39m, [32mList[39m[[32mInt[39m]] = [33mMap[39m(
  false -> [33mList[39m([32m1[39m, [32m3[39m, [32m5[39m),
  true -> [33mList[39m([32m2[39m, [32m4[39m, [32m6[39m)
)

### Exercise

In [4]:
case class User(id: Int, country: String)
val plays = List(
    User(1, "US"),
    User(1, "US"),
    User(2, "SE"),
    User(2, "SE"),
    User(1, "US"),
    User(3, "NL"),
    User(4, "US"),
    User(5, "DE"),
    User(6, "DE"),
)

defined [32mclass[39m [36mUser[39m
[36mplays[39m: [32mList[39m[[32mUser[39m] = [33mList[39m(
  [33mUser[39m([32m1[39m, [32m"US"[39m),
  [33mUser[39m([32m1[39m, [32m"US"[39m),
  [33mUser[39m([32m2[39m, [32m"SE"[39m),
  [33mUser[39m([32m2[39m, [32m"SE"[39m),
  [33mUser[39m([32m1[39m, [32m"US"[39m),
  [33mUser[39m([32m3[39m, [32m"NL"[39m),
  [33mUser[39m([32m4[39m, [32m"US"[39m),
  [33mUser[39m([32m5[39m, [32m"DE"[39m),
  [33mUser[39m([32m6[39m, [32m"DE"[39m)
)

In [4]:
// Find number of unique users in country
// TODO val usersByCountry = 

### Test

In [7]:
val expected = Map("SE" -> 1, "DE" -> 2, "NL" -> 1, "US" -> 2)
assert(usersByCountry == expected)

[36mexpected[39m: [32mMap[39m[[32mString[39m, [32mInt[39m] = [33mMap[39m([32m"SE"[39m -> [32m1[39m, [32m"DE"[39m -> [32m2[39m, [32m"NL"[39m -> [32m1[39m, [32m"US"[39m -> [32m2[39m)

In [1]:
// Notes:
// Reduce Option is safer than reduce.
def mrSum(xs: List[Int]): Int =
    xs.reduceOption( (a: Int, b: Int) => a + b )
      .getOrElse(0)
assert(mrSum(List[Int]()) == 0)

defined [32mfunction[39m [36mmrSum[39m

## Solutions

In [5]:
// Just map and reduce
val usersByCountry = plays.groupBy(p => p.country)
.mapValues( 
    (users: Seq[User]) =>
        users.distinct
            .map( (u: User) => 1)
            .reduce( (a: Int, b: Int) => a + b)
          )

[36musersByCountry[39m: [32mMap[39m[[32mString[39m, [32mInt[39m] = [33mMap[39m(
  [32m"SE"[39m -> [32m1[39m,
  [32m"DE"[39m -> [32m2[39m,
  [32m"NL"[39m -> [32m1[39m,
  [32m"US"[39m -> [32m2[39m
)

In [6]:
// With some nice Scala sugar
// We were just calculating size of a list using 
val usersByCountry = 
    plays
        .groupBy(p => p.country)
        .mapValues( v => v.distinct.size)

[36musersByCountry[39m: [32mMap[39m[[32mString[39m, [32mInt[39m] = [33mMap[39m(
  [32m"SE"[39m -> [32m1[39m,
  [32m"DE"[39m -> [32m2[39m,
  [32m"NL"[39m -> [32m1[39m,
  [32m"US"[39m -> [32m2[39m
)