### Import Required modules and initialize SparkSession

In [7]:
from __future__ import print_function
import findspark
findspark.init()
import pyspark
from pyspark.sql import SparkSession

In [8]:
spark = SparkSession.builder.appName("PySpark Row API").getOrCreate()

In [9]:
spark

### Row

A row in DataFrame. 

- Create Row:
    - Row can be used to create a row object by using named arguments, the fields will be sorted by names. 
    - It is not allowed to omit a named argument to represent the value is None or missing. This should be explicitly set to None in this case.

- Accessing fiels in Row:
  - Accessing fields/keys in Row will access through row keys
      - like attributes (row.key)
      - like dictionary values (row[key])   

### Import PySpark Row API

In [12]:
from pyspark.sql import Row

### Create Row Object

In [14]:
row = Row(name="Bhavishya", age=6)
row

Row(name='Bhavishya', age=6)

#### Row also can be used to create another Row like class, then it could be used to create Row objects, such as

In [15]:
Person = Row("name", "age")
Person

<Row('name', 'age')>

In [28]:
type(Person)

pyspark.sql.types.Row

In [17]:
row1 = Person("Bhavishya", 6)
row1

Row(name='Bhavishya', age=6)

In [29]:
type(row1)

pyspark.sql.types.Row

### Accessing Elements in Row
- Accessing fields/keys in Row will access through row keys
        - like attributes (row.key)
        - like dictionary values (row[key])   

In [19]:
row

Row(name='Bhavishya', age=6)

In [20]:
row.name

'Bhavishya'

In [21]:
row['age']

6

In [24]:
data = row.name,row.age
data

('Bhavishya', 6)

In [25]:
type(data)

tuple

#### Check is key/column in Row 

In [26]:
'name' in row

True

In [27]:
'city' in row

False

### Get Row as Python Dict

In [31]:
row.asDict()

{'name': 'Bhavishya', 'age': 6}

In [34]:
r = Row(emp_id=1, value=Row(name='Bhivishya', age=6))
r

Row(emp_id=1, value=Row(name='Bhivishya', age=6))

In [37]:
r.asDict()

{'emp_id': 1, 'value': Row(name='Bhivishya', age=6)}

In [38]:
r.asDict(recursive=True)

{'emp_id': 1, 'value': {'name': 'Bhivishya', 'age': 6}}

Note : recursive – turns the nested Row as dict (default: False).

## Create Dataframe with list of Rows

In [39]:
df = spark.createDataFrame([
    Row(name='Bhavishya', age=6),
    Row(name='Teja', age=9),
])

In [40]:
df.show()

+---------+---+
|     name|age|
+---------+---+
|Bhavishya|  6|
|     Teja|  9|
+---------+---+

