# Rows
* Rows represent data that fits into the columns and types of the `DataFrame`
* Rows themselves do not have schemas
* If you create a Row manually, you must specify the values in the same order as the schema of the DataFrame


In [9]:
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row

val structType = new StructType(Array(
    StructField("id", IntegerType, false),
    StructField("firstName", StringType, false),
    StructField("lastName", StringType, false),
    StructField("department", StringType, false)))

val data = Seq(
     Row(1, "James", "Gosling", "Coffee and Accessories"),
     Row(2, "Guido", "Van Rossum", "Snakes, Lizards, Spiders"),
     Row(3, "Bjarne", "Stroustrup", "Optometry"),
     Row(4, "John", "McCarthy", "Speech Therapy")
   )

val dataFrame = spark.createDataFrame(spark.sparkContext.parallelize(data), structType)
dataFrame.show()

+---+---------+----------+--------------------+
| id|firstName|  lastName|          department|
+---+---------+----------+--------------------+
|  1|    James|   Gosling|Coffee and Access...|
|  2|    Guido|Van Rossum|Snakes, Lizards, ...|
|  3|   Bjarne|Stroustrup|           Optometry|
|  4|     John|  McCarthy|      Speech Therapy|
+---+---------+----------+--------------------+



import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
structType: org.apache.spark.sql.types.StructType = StructType(StructField(id,IntegerType,false), StructField(firstName,StringType,false), StructField(lastName,StringType,false), StructField(department,StringType,false))
data: Seq[org.apache.spark.sql.Row] = List([1,James,Gosling,Coffee and Accessories], [2,Guido,Van Rossum,Snakes, Lizards, Spiders], [3,Bjarne,Stroustrup,Optometry], [4,John,McCarthy,Speech Therapy])
dataFrame: org.apache.spark.sql.DataFrame = [id: int, firstName: string ... 2 more fields]


## Adding a row or rows

* `union` can add one or more rows together
* `union` requires a `DataSet` of `Row`

In [20]:
import spark.implicits._
val matz = (5, "Yukihiro", "Matsumoto", "Jewelry")
val wall = (6, "Larry", "Wall", "Jewelry")
val jewelers = Seq(matz, wall).toDF("id", "firstName", "lastName", "department")

import spark.implicits._
matz: (Int, String, String, String) = (5,Yukihiro,Matsumoto,Jewelry)
wall: (Int, String, String, String) = (6,Larry,Wall,Jewelry)
jewelers: org.apache.spark.sql.DataFrame = [id: int, firstName: string ... 2 more fields]


In [22]:
val combinedDF = dataFrame.union(jewelers)
combinedDF.show()

+---+---------+----------+--------------------+
| id|firstName|  lastName|          department|
+---+---------+----------+--------------------+
|  1|    James|   Gosling|Coffee and Access...|
|  2|    Guido|Van Rossum|Snakes, Lizards, ...|
|  3|   Bjarne|Stroustrup|           Optometry|
|  4|     John|  McCarthy|      Speech Therapy|
|  5| Yukihiro| Matsumoto|             Jewelry|
|  6|    Larry|      Wall|             Jewelry|
+---+---------+----------+--------------------+



combinedDF: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [id: int, firstName: string ... 2 more fields]


## Get the first row

In [24]:
val row = combinedDF.head()

row: org.apache.spark.sql.Row = [1,James,Gosling,Coffee and Accessories]


## Take the first few rows

In [30]:
combinedDF.take(3)

res8: Array[org.apache.spark.sql.Row] = Array([1,James,Gosling,Coffee and Accessories], [2,Guido,Van Rossum,Snakes, Lizards, Spiders], [3,Bjarne,Stroustrup,Optometry])


## Get Elements of a row

In [32]:
val odersky = Row(7, "Martin", "Odersky", "Stair Repair")
odersky.getString(3)

odersky: org.apache.spark.sql.Row = [7,Martin,Odersky,Stair Repair]
res10: String = Stair Repair


In [33]:
val kernighan = Row(7, "Brian", "Kernighan", "Oceanic Travel")
kernighan.getAs[Int](0)

kernighan: org.apache.spark.sql.Row = [7,Brian,Kernighan,Oceanic Travel]
res11: Int = 7


## Adding the last two for a larger `DataFrame`

In [None]:
case class Employee(id:Int, )
val jewelers = Seq().toDF("id", "firstName", "lastName", "department")

## Get all elements in an `Array`

## Getting all the departments using functional programming

In [39]:
combinedDF.map(row => row.getAs[String](3))

<console>: 48: error: value toList is not a member of org.apache.spark.sql.Dataset[String]