# Example PySpark Notebook

In [1]:
from pyspark.sql import SparkSession

# Initialize a SparkSession
spark = (
    SparkSession.builder.appName("Spark Example").getOrCreate()
)

23/12/13 11:00:09 WARN Utils: Your hostname, codespaces-d5d71e resolves to a loopback address: 127.0.0.1; using 172.16.5.4 instead (on interface eth0)
23/12/13 11:00:09 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/13 11:00:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:

# Example DataFrame
data = [
    ("James", "Smith", "USA", 30),
    ("Michael", "Rose", "USA", 45),
    ("Robert", "Williams", "USA", 25),
    ("Maria", "Jones", "USA", 22),
]

columns = ["Firstname", "Lastname", "Country", "Age"]
df = spark.createDataFrame(data, columns)
df

DataFrame[Firstname: string, Lastname: string, Country: string, Age: bigint]

In [3]:
# Show the DataFrame
df.show()

                                                                                

+---------+--------+-------+---+
|Firstname|Lastname|Country|Age|
+---------+--------+-------+---+
|    James|   Smith|    USA| 30|
|  Michael|    Rose|    USA| 45|
|   Robert|Williams|    USA| 25|
|    Maria|   Jones|    USA| 22|
+---------+--------+-------+---+



In [4]:
# Register the DataFrame as a SQL temporary view
df.createOrReplaceTempView("tmp_customer")

# Run SQL commands
spark.sql("select FirstName from tmp_customer").show()

+---------+
|FirstName|
+---------+
|    James|
|  Michael|
|   Robert|
|    Maria|
+---------+



### Load sparksql_magic command to enable %%sparksql magic commands


In [5]:
%load_ext sparksql_magic

In [6]:
%%sparksql
select FirstName, * from tmp_customer

0,1,2,3,4
FirstName,Firstname,Lastname,Country,Age
James,James,Smith,USA,30
Michael,Michael,Rose,USA,45
Robert,Robert,Williams,USA,25
Maria,Maria,Jones,USA,22


In [7]:
%%sparksql
create or replace temporary view tmp_country 
as 
select 
    Country, LastName
from 
    tmp_customer;

In [8]:
%%sparksql
select 
    * 
from 
    tmp_country

0,1
Country,LastName
USA,Smith
USA,Rose
USA,Williams
USA,Jones
