#### Step 1: Load the movies.avro file in memory

In [None]:
!hdfs dfs -put ~/training_materials/data/movies.avro /user/cloudera

#### Step 2: Create SQL Context

In [1]:
from pyspark.sql import SQLContext
SQLContext = SQLContext(sc)

#### Step 3: Create a dataframe from the file

In [2]:
movies_df = SQLContext.read.format("com.databricks.spark.avro").load("movies.avro")


#### Step 4: Verify Column header for  the file

In [3]:
movies_df.columns

['id', 'name', 'genre']

#### Step 5: Print the first 5 rows of the dataframe

In [4]:
movies_df.show(5)

+---+--------------------+--------------------+
| id|                name|               genre|
+---+--------------------+--------------------+
|  1|    Toy Story (1995)|Animation|Childre...|
|  2|      Jumanji (1995)|Adventure|Childre...|
|  3|Grumpier Old Men ...|      Comedy|Romance|
|  4|Waiting to Exhale...|        Comedy|Drama|
|  5|Father of the Bri...|              Comedy|
+---+--------------------+--------------------+
only showing top 5 rows



#### Step 6: Sort the movies based on alphabetical order

In [5]:
movies_df.sort(movies_df.name).show(5,False)

+----+-----------------------------+-----------------+
|id  |name                         |genre            |
+----+-----------------------------+-----------------+
|2031|$1000000 Duck (1971)         |Children's|Comedy|
|3112|'Night Mother (1986)         |Drama            |
|779 |'Til There Was You (1997)    |Drama|Romance    |
|2072|'burbs The (1989)            |Comedy           |
|3420|...And Justice for All (1979)|Drama|Thriller   |
+----+-----------------------------+-----------------+
only showing top 5 rows



#### Step 7: Write the output in parquet format

In [6]:
movies_df.write.save("movies.parquet")

#### Step 8: Review the content of the new parquet file

In [9]:
movies_df_parquet = SQLContext.read.load("movies.parquet")
movies_df_parquet.show(5)

+---+--------------------+--------------------+
| id|                name|               genre|
+---+--------------------+--------------------+
|  1|    Toy Story (1995)|Animation|Childre...|
|  2|      Jumanji (1995)|Adventure|Childre...|
|  3|Grumpier Old Men ...|      Comedy|Romance|
|  4|Waiting to Exhale...|        Comedy|Drama|
|  5|Father of the Bri...|              Comedy|
+---+--------------------+--------------------+
only showing top 5 rows

