In [0]:
# List of essential Databricks commands with descriptions and demos

# 1. %fs: File System commands
# Description: Used to interact with the Databricks File System (DBFS).
# Demo:
# %fs ls /mnt/data

# 2. %sql: SQL commands
# Description: Used to run SQL queries.
# Demo:
# %sql
# SELECT * FROM table_name

# 3. %md: Markdown
# Description: Used to render Markdown in a notebook.
# Demo:
# %md
# # This is a Markdown cell

# 4. %sh: Shell commands
# Description: Used to run shell commands.
# Demo:
# %sh
# ls -l

# 5. %pip: Install Python packages
# Description: Used to install Python packages.
# Demo:
# %pip install pandas

# 6. display(): Display DataFrames
# Description: Used to display DataFrames in a rich format.
# Demo:
# df = spark.read.csv("/mnt/data/file.csv")
# display(df)

# 7. dbutils: Databricks Utilities
# Description: Used to perform various utility operations.
# Demo:
# dbutils.fs.help()  # Shows help for file system utilities

In [0]:
# Basic PySpark Commands Demo

# 1. Reading a CSV file into a DataFrame
df = spark.read.csv("/mnt/data/sample.csv", header=True, inferSchema=True)

# 2. Displaying the DataFrame
display(df)

# 3. Showing the schema of the DataFrame
df.printSchema()

# 4. Selecting specific columns
selected_df = df.select("column1", "column2")
display(selected_df)

# 5. Filtering rows based on a condition
filtered_df = df.filter(df["column1"] > 100)
display(filtered_df)

# 6. Grouping by a column and performing aggregation
grouped_df = df.groupBy("column1").agg({"column2": "sum"})
display(grouped_df)

# 7. Writing the DataFrame to a Parquet file
df.write.parquet("/mnt/data/output.parquet")