In [50]:
val sqlC = new org.apache.spark.sql.SQLContext(sc)
import sqlC.implicits._

//Create the players DataFrame
val players = Seq(
    (0, "Kyrie Irving", 7, Seq(1)),
    (1, "DeMar DeRozan", 4, Seq(2)),
    (2, "LeBron James", 2, Seq(3, 4, 5)),
    (3, "G. Antetokounmpo", 0, Seq(3, 4, 5)),
    (4, "Jimmy Butler", 1, Seq(3, 4, 5)),
    (5, "Stephen Curry", 6, Seq(1)),
    (6, "James Harden", 10, Seq(2)),
    (7, "Kevin Durant", 6, Seq(3)),
    (8, "Kawhi Leonard", 3, Seq(4)),
    (9, "Anthony Davis", 8, Seq(5))).
  toDF("id", "name", "team_id", "position")

players.show()

+---+----------------+-------+---------+
| id|            name|team_id| position|
+---+----------------+-------+---------+
|  0|    Kyrie Irving|      7|      [1]|
|  1|   DeMar DeRozan|      4|      [2]|
|  2|    LeBron James|      2|[3, 4, 5]|
|  3|G. Antetokounmpo|      0|[3, 4, 5]|
|  4|    Jimmy Butler|      1|[3, 4, 5]|
|  5|   Stephen Curry|      6|      [1]|
|  6|    James Harden|     10|      [2]|
|  7|    Kevin Durant|      6|      [3]|
|  8|   Kawhi Leonard|      3|      [4]|
|  9|   Anthony Davis|      8|      [5]|
+---+----------------+-------+---------+



In [51]:
//Create the teams DataFrame
val teams = Seq(
    (0, "Bucks", "Eastern"),
    (1, "Bulls", "Eastern"),
    (2, "Cavaliers", "Eastern"),
    (3, "Spurs", "Western"),
    (4, "Raptors", "Eastern"),
    (5, "Magic", "Eastern"),
    (6, "Warriors", "Western"),
    (7, "Celtics", "Eastern"),
    (8, "Pelicans", "Western"),
    (9, "Lakers", "Western"),
    (10, "Rockets", "Western")).
  toDF("id", "team", "conference")

teams.show()

+---+---------+----------+
| id|     team|conference|
+---+---------+----------+
|  0|    Bucks|   Eastern|
|  1|    Bulls|   Eastern|
|  2|Cavaliers|   Eastern|
|  3|    Spurs|   Western|
|  4|  Raptors|   Eastern|
|  5|    Magic|   Eastern|
|  6| Warriors|   Western|
|  7|  Celtics|   Eastern|
|  8| Pelicans|   Western|
|  9|   Lakers|   Western|
| 10|  Rockets|   Western|
+---+---------+----------+



In [32]:
//Create the positions DataFrame
val positions = Seq(
    (1, "Point Gaurd"),
    (2, "Shooting Gaurd"),
    (3, "Small Forward"),
    (4, "Power Forward"),
    (5, "Center")).
  toDF("id", "position")

positions.show()

+---+--------------+
| id|      position|
+---+--------------+
|  1|   Point Gaurd|
|  2|Shooting Gaurd|
|  3| Small Forward|
|  4| Power Forward|
|  5|        Center|
+---+--------------+



In [52]:
//Join the data from the teams DataFrame to the players DataFrame

val joinExpr = players.col("team_id") === teams.col("id")

players.join(teams, joinExpr, "inner").
    drop(teams.col("id")).
    show()

+---+----------------+-------+---------+---------+----------+
| id|            name|team_id| position|     team|conference|
+---+----------------+-------+---------+---------+----------+
|  0|    Kyrie Irving|      7|      [1]|  Celtics|   Eastern|
|  1|   DeMar DeRozan|      4|      [2]|  Raptors|   Eastern|
|  2|    LeBron James|      2|[3, 4, 5]|Cavaliers|   Eastern|
|  3|G. Antetokounmpo|      0|[3, 4, 5]|    Bucks|   Eastern|
|  4|    Jimmy Butler|      1|[3, 4, 5]|    Bulls|   Eastern|
|  5|   Stephen Curry|      6|      [1]| Warriors|   Western|
|  6|    James Harden|     10|      [2]|  Rockets|   Western|
|  7|    Kevin Durant|      6|      [3]| Warriors|   Western|
|  8|   Kawhi Leonard|      3|      [4]|    Spurs|   Western|
|  9|   Anthony Davis|      8|      [5]| Pelicans|   Western|
+---+----------------+-------+---------+---------+----------+



In [58]:
//Outer-Join the data from the players DataFrame to the teams DataFrame

//This shows all the team names in the DataFrame
//even without players that are started the All-Star game

teams.join(players, joinExpr, "left_outer").
    drop(teams.col("id")).
    show()

+---------+----------+----+----------------+-------+---------+
|     team|conference|  id|            name|team_id| position|
+---------+----------+----+----------------+-------+---------+
|    Bucks|   Eastern|   3|G. Antetokounmpo|      0|[3, 4, 5]|
|    Bulls|   Eastern|   4|    Jimmy Butler|      1|[3, 4, 5]|
|Cavaliers|   Eastern|   2|    LeBron James|      2|[3, 4, 5]|
|    Spurs|   Western|   8|   Kawhi Leonard|      3|      [4]|
|  Raptors|   Eastern|   1|   DeMar DeRozan|      4|      [2]|
|    Magic|   Eastern|null|            null|   null|     null|
| Warriors|   Western|   7|    Kevin Durant|      6|      [3]|
| Warriors|   Western|   5|   Stephen Curry|      6|      [1]|
|  Celtics|   Eastern|   0|    Kyrie Irving|      7|      [1]|
| Pelicans|   Western|   9|   Anthony Davis|      8|      [5]|
|   Lakers|   Western|null|            null|   null|     null|
|  Rockets|   Western|   6|    James Harden|     10|      [2]|
+---------+----------+----+----------------+-------+---

In [60]:
//Semi-Join the data from the teams DataFrame to the players DataFrame

//This shows only the teams that did not have any starters in the All-Star game

teams.join(players, joinExpr, "left_anti").show()

+---+------+----------+
| id|  team|conference|
+---+------+----------+
|  5| Magic|   Eastern|
|  9|Lakers|   Western|
+---+------+----------+

