# Getting started

- Create required Azure resources.

In [None]:
%%bash

set -e

# Set your resource group and Cosmos DB account name
RESGROUP=""
ACCTNAME=""

az cosmosdb create --resource-group "$RESGROUP" --name "$ACCTNAME"

export ACCOUNT_URI=$(az cosmosdb show --resource-group "$RESGROUP" --name "$ACCTNAME" --query documentEndpoint --output tsv)
export ACCOUNT_KEY=$(az cosmosdb list-keys --resource-group "$RESGROUP" --name "$ACCTNAME" --query primaryMasterKey --output tsv)

In [None]:
# Set configuration settings
import os
URL = os.environ['ACCOUNT_URI']
KEY = os.environ['ACCOUNT_KEY']

config = {
  "spark.cosmos.accountEndpoint": URL,
  "spark.cosmos.accountKey": KEY,
  "spark.cosmos.database": "cosmicworks",
  "spark.cosmos.container": "products"
}

In [None]:
# Configure the Catalog API
spark.conf.set("spark.sql.catalog.cosmosCatalog", "com.azure.cosmos.spark.CosmosCatalog")
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint", config["spark.cosmos.accountEndpoint"])
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey", config["spark.cosmos.accountKey"])

In [None]:
# Create a new database and container
spark.sql(f"CREATE DATABASE IF NOT EXISTS cosmosCatalog.cosmicworks;")

# Create a products container by using the Catalog API
spark.sql(("CREATE TABLE IF NOT EXISTS cosmosCatalog.cosmicworks.products USING cosmos.oltp TBLPROPERTIES(partitionKeyPath = '/category', autoScaleMaxThroughput = '1000')"))
spark.sql(("CREATE TABLE IF NOT EXISTS cosmosCatalog.cosmicworks.employees USING cosmos.oltp TBLPROPERTIES(partitionKeyPath = '/organization,/department,/team', manualThroughput = '400')"))

## Ingest sample data

In [None]:
products = (
  ("68719518391", "gear-surf-surfboards", "Yamba Surfboard", 12, 850.00, False),
  ("68719518371", "gear-surf-surfboards", "Kiama Classic Surfboard", 25, 790.00, True)
)

spark.createDataFrame(products) \
  .toDF("id", "category", "name", "quantity", "price", "clearance") \
  .write \
  .format("cosmos.oltp") \
  .options(**config) \
  .mode("APPEND") \
  .save()

# Query data

In [None]:
df = spark.read.format("cosmos.oltp") \
  .options(**config) \
  .option("spark.cosmos.read.inferSchema.enabled", "true") \
  .load()

# Render schema
df.printSchema()

# Render filtered data    
df.where("quantity < 20") \
  .show()
df.filter(df.clearance == True) \
  .show(1)

# Render five rows of unfiltered and untruncated data    
df.show(5, False)

# Render results of raw query    
rawQuery = "SELECT * FROM cosmosCatalog.cosmicworks.products WHERE price > 800"
rawDf = spark.sql(rawQuery)
rawDf.show()