In [68]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
import pyspark.sql.functions as func 

In [69]:
spark = SparkSession.builder.appName("FirstApp").getOrCreate()

In [70]:
myschema = StructType([\
                        StructField("userID", IntegerType(), True),
                        StructField("name", StringType(), True),
                        StructField("age", IntegerType(), True),
                        StructField("friends", IntegerType(), True),
                        ])

In [71]:
#Creating DataFrame on a CSV file
people = spark.read.format("csv")\
    .schema(myschema)\
    .option("path", "fakefriends.csv")\
    .load()

In [72]:
people.printSchema()

root
 |-- userID: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- age: integer (nullable = true)
 |-- friends: integer (nullable = true)



In [73]:

#Performing all thetransformations
output = people.select(people.userID,people.name\
                       ,people.age,people.friends)\
         .where(people.age < 30).withColumn('insert_ts', func.current_timestamp())\
         .orderBy(people.userID)

In [74]:

#taking the count of o/p DataFrame
output.count()

112

In [75]:
#Creating a Temp View
output.createOrReplaceTempView("peoples")

In [76]:
#Running a simple Spark SQL query
spark.sql("select userID, name from peoples").show()

+------+--------+
|userID|    name|
+------+--------+
|     1|Jean-Luc|
|     9|    Hugh|
|    16|  Weyoun|
|    21|   Miles|
|    24|  Julian|
|    25|     Ben|
|    26|  Julian|
|    32|     Nog|
|    35| Beverly|
|    46|    Morn|
|    47|   Brunt|
|    48|     Nog|
|    52| Beverly|
|    54|   Brunt|
|    60|  Geordi|
|    66|  Geordi|
|    72|  Kasidy|
|    73|   Brunt|
|    84|     Ben|
|    89|    Worf|
+------+--------+
only showing top 20 rows

