# An introduction to Apache Spark.

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
import pyspark.sql.functions as func

In [4]:
#creating spark session.
spark = SparkSession.builder.appName("FirstApp").getOrCreate()

In [6]:
#Defining the schema for our dataframe.
myschema = StructType([\
                        StructField("userID", IntegerType(), True),
                        StructField("name", StringType(), True),
                        StructField("age", IntegerType(), True),
                        StructField("friends", IntegerType(), True)
                      ])

In [7]:
#creating a data frame on a csv file.
people = spark.read.format("csv").schema(myschema).option("path","fakefriends.csv").load()

In [8]:
#performing all transformations.
output = people.select(people.userID, people.name\
                      ,people.age,people.friends)\
        .where(people.age < 30).withColumn('insert_ts', func.current_timestamp())\
        .orderBy(people.userID)

In [9]:
#taking the count of the o/p DataFrame.
output.count()

112

In [10]:
#create a Tempp View.
output.createOrReplaceTempView("peoples")

In [11]:
#Running a simple spark SQL query.
spark.sql("select name, age, friends, insert_ts from peoples").show()

+--------+---+-------+--------------------+
|    name|age|friends|           insert_ts|
+--------+---+-------+--------------------+
|Jean-Luc| 26|      2|2023-12-01 17:48:...|
|    Hugh| 27|    181|2023-12-01 17:48:...|
|  Weyoun| 22|    323|2023-12-01 17:48:...|
|   Miles| 19|    268|2023-12-01 17:48:...|
|  Julian| 25|      1|2023-12-01 17:48:...|
|     Ben| 21|    445|2023-12-01 17:48:...|
|  Julian| 22|    100|2023-12-01 17:48:...|
|     Nog| 26|    281|2023-12-01 17:48:...|
| Beverly| 27|    305|2023-12-01 17:48:...|
|    Morn| 25|     96|2023-12-01 17:48:...|
|   Brunt| 24|     49|2023-12-01 17:48:...|
|     Nog| 20|      1|2023-12-01 17:48:...|
| Beverly| 19|    269|2023-12-01 17:48:...|
|   Brunt| 19|      5|2023-12-01 17:48:...|
|  Geordi| 20|    100|2023-12-01 17:48:...|
|  Geordi| 21|    477|2023-12-01 17:48:...|
|  Kasidy| 22|    179|2023-12-01 17:48:...|
|   Brunt| 20|    384|2023-12-01 17:48:...|
|     Ben| 28|    311|2023-12-01 17:48:...|
|    Worf| 24|    492|2023-12-01