Databricks SQL also provides built-in GenAI functions calling specialized LLMs with a custom prompt, providing fast answers.
These functions are the following:

    ai_analyze_sentiment
    ai_classify
    ai_extract
    ai_fix_grammar
    ai_gen
    ai_mask
    ai_similarity
    ai_summarize
    ai_translate


In [0]:
%python
%pip install dbdemos

Collecting dbdemos
  Downloading dbdemos-0.6.26-py3-none-any.whl.metadata (9.4 kB)
Downloading dbdemos-0.6.26-py3-none-any.whl (30.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/30.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m29.9/30.1 MB[0m [31m156.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.1/30.1 MB[0m [31m113.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dbdemos
Successfully installed dbdemos-0.6.26
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
%python
import dbdemos
dbdemos.list_demos()

In [0]:
%python
dbdemos.install('sql-ai-functions') 

Installing demo sql-ai-functions under /Users/amit.dixit@impetus.com/dbxsql-feature-set/sql/ai, please wait...
Help us improving dbdemos, share your feedback or create an issue if something isn't working: https://github.com/databricks-demos/dbdemos


In [0]:
-- Make sure you run this notebook using a SQL Warehouse or Serverless endpoint (not a classic cluster)
-- assert_true function returns an untyped null if no error is returned
SELECT assert_true(current_version().dbsql_version is not null, 'YOU MUST USE A SQL WAREHOUSE OR SERVERLESS, not a classic cluster');

SELECT ai_gen('Generate a concise, cheerful email title for a summer bike sale with 20% discount');

"ai_gen('Generate a concise, cheerful email title for a summer bike sale with 20% discount')"
"Here are a few options: 1. ""Pedal into Savings: 20% Off Summer Bike Sale!"" 2. ""Summer Cycle Sale: 20% Discount Inside!"" 3. ""Ride into Summer with 20% Off Bikes!"" 4. ""Sunny Savings: 20% Off Bikes This Summer!"" 5. ""Gear Up for Summer: 20% Off Bike Sale!"" Choose the one that best fits your tone and style!"


In [0]:
SELECT ai_fix_grammar('This sentence have some mistake');

ai_fix_grammar('This sentence have some mistake')
This sentence has some mistakes


In [0]:
SELECT ai_classify("My password is leaked.", ARRAY("urgent", "not urgent"));

"ai_classify(""My password is leaked."",ARRAY(""urgent"",""not urgent""))"
urgent


In [0]:
SELECT ai_translate("This function is so amazing!", "hi")

"ai_translate(""This function is so amazing!"",""hi"")"
यह फंक्शन इतना अद्भुत है!


In [0]:
SELECT
  ai_similarity('Amit Sharma', 'Amit Dixit'),
  ai_similarity('Amit Dixit', 'Amit Dixit');

"ai_similarity('Amit Sharma','Amit Dixit')","ai_similarity('Amit Dixit','Amit Dixit')"
0.72022814,1.0


In [0]:
-- as previously, make sure you run this notebook using a SQL Warehouse or Serverless endpoint (not a classic cluster)
-- assert_true function returns an untyped null if no error is returned
SELECT
  assert_true(
    current_version().dbsql_version is not null,
    'YOU MUST USE A SQL WAREHOUSE OR SERVERLESS, not a classic cluster'
  );

USE CATALOG main;
CREATE SCHEMA IF NOT EXISTS dbdemos_ai_query;
USE SCHEMA dbdemos_ai_query;

In [0]:
SELECT
  AI_QUERY(
    "databricks-meta-llama-3-3-70b-instruct",
    "Generate a short product review for a red dress. The customer is very happy with the article."
  ) as product_review

product_review
"""I am absolutely thrilled with this stunning red dress! The moment I put it on, I felt like a million bucks. The vibrant color is even more gorgeous in person, and the fit is perfect. The material is comfortable and flows beautifully, making me feel confident and elegant. I've received countless compliments every time I've worn it, and I couldn't be happier with my purchase. The quality is exceptional, and I can tell it will be a favorite in my wardrobe for years to come. Five stars isn't enough - I'd give it ten stars if I could!"""


In [0]:
CREATE OR REPLACE FUNCTION ASK_LLM_MODEL(prompt STRING, response_format STRING DEFAULT '{"type": "string"}') 
  RETURNS STRING
  RETURN 
    AI_QUERY("databricks-meta-llama-3-3-70b-instruct", 
              prompt,
              response_format);

-- ALTER FUNCTION ASK_LLM_MODEL OWNER TO `your_principal`; -- for the demo only, make sure other users can access your function

In [0]:
SELECT ASK_LLM_MODEL("Generate a short product review for a red dress. The customer is very happy with the article.")

"ASK_LLM_MODEL(""Generate a short product review for a red dress. The customer is very happy with the article."")"
"""I am absolutely thrilled with this stunning red dress! The moment I put it on, I felt like a million bucks. The vibrant color is even more gorgeous in person, and the fit is perfect. The material is comfortable and flows beautifully, making me feel confident and elegant. I've received countless compliments every time I've worn it, and I couldn't be happier with my purchase. The quality is exceptional, and I can tell it will be a favorite in my wardrobe for years to come. Five stars isn't enough - I'd give it ten stars if I could!"""


In [0]:
SELECT ASK_LLM_MODEL(
      'Generate a sample dataset of 2 rows that contains the following columns: "review_date" (random dates in 2022), 
      "review_id" (random id), "customer_id" (random long from 1 to 100)  and "review". 
      Reviews should mimic useful product reviews from popular grocery brands product left on an e-commerce marketplace website. The review must include the product name.

      The reviews should vary in length (shortest: one sentence, longest: 2 paragraphs), sentiment, and complexity. A very complex review 
      would talk about multiple topics (entities) about the product with varying sentiment per topic. Provide a mix of positive, negative, 
      and neutral reviews.

      Give me JSON only. No text outside JSON. No explanations or notes
      [{"review_date":<date>, "review_id":<long>, "customer_id":<long>, "review":<string>}]', "{'type': 'json_object'}") as fake_reviews;

fake_reviews
"[  {  ""review_date"": ""2022-07-15"",  ""review_id"": 814,  ""customer_id"": 43,  ""review"": ""I recently purchased the Kellogg's Frosted Flakes cereal and I must say it's a classic, the taste is still the same as I remembered from my childhood, but the box size has decreased over the years which is a bit disappointing.""  },  {  ""review_date"": ""2022-03-20"",  ""review_id"": 275,  ""customer_id"": 91,  ""review"": ""I've been buying the Nestle Pure Life water from this website for a while now and I have mixed feelings about it. On one hand, the water tastes great and the price is reasonable, but on the other hand, the packaging is not very eco-friendly and the delivery service has been inconsistent. The Haagen-Dazs ice cream I also purchased is amazing though, the cookies and cream flavor is my favorite. Overall, I would recommend the ice cream but not the water.""  } ]"


In [0]:
CREATE OR REPLACE FUNCTION GENERATE_FAKE_REVIEWS(num_reviews INT DEFAULT 5)
  RETURNS array<struct<review_date:date, review_id:long, customer_id:long, review:string>>
  RETURN 
  SELECT FROM_JSON(
      ASK_LLM_MODEL(
        CONCAT('Generate a sample dataset of ', num_reviews, ' rows that contains the following columns: "review_date" (random dates in 2022), 
        "review_id" (random long), "customer_id" (random long from 1 to ', num_reviews, '), and "review". 
        Reviews should mimic useful product reviews from popular grocery brands product left on an e-commerce marketplace website. The review must include the product name.
        
        The reviews should vary in length (shortest: one sentence, longest: 2 paragraphs), sentiment, and complexity. A very complex review 
        would talk about multiple topics (entities) about the product with varying sentiment per topic. Provide a mix of positive, negative, 
        and neutral reviews.

        Give me JSON only. No text outside JSON. No explanations or notes
        [{"review_date":<date>, "review_id":<long>, "customer_id":<long>, "review":<string>}]'), "{'type': 'json_object'}"), 
        "array<struct<review_date:date, review_id:long, customer_id:long, review:string>>")

-- ALTER FUNCTION GENERATE_FAKE_REVIEWS OWNER TO `your_principal`; -- for the demo only, make sure other users can access your function

In [0]:
SELECT
  review.*
FROM
  (
    SELECT
      explode(reviews) as review
    FROM
      (
        SELECT
          GENERATE_FAKE_REVIEWS(10) as reviews
      )
  )

review_date,review_id,customer_id,review


In [0]:
CREATE OR REPLACE TABLE fake_reviews COMMENT "Raw Review Data" AS
SELECT
  review.*
FROM
  (
    SELECT
      explode(reviews) as review
    FROM
      (
        SELECT
          generate_fake_reviews(50) as reviews
      )
  )

num_affected_rows,num_inserted_rows


In [0]:
CREATE OR REPLACE FUNCTION GENERATE_FAKE_CUSTOMERS(num_reviews INT DEFAULT 10)
  RETURNS array<struct<customer_id:long, firstname:string, lastname:string, order_count:int>>
  RETURN 
  SELECT FROM_JSON(
      ASK_LLM_MODEL(
        CONCAT('Generate a sample dataset of ', num_reviews, ' customers containing the following columns: 
        "customer_id" (long from 1 to ', num_reviews, '), "firstname", "lastname" and order_count (random positive number, smaller than 200)

        Give me JSON only. No text outside JSON. No explanations or notes
        [{"customer_id":<long>, "firstname":<string>, "lastname":<string>, "order_count":<int>}]'), "{'type': 'json_object'}"), 
        "array<struct<customer_id:long, firstname:string, lastname:string, order_count:int>>")
        
-- ALTER FUNCTION GENERATE_FAKE_CUSTOMERS OWNER TO `your_principal`; -- for the demo only, make sure other users can access your function

In [0]:
CREATE OR REPLACE TABLE fake_customers
  COMMENT "Raw customers"
  AS
  SELECT customer.* FROM (
    SELECT explode(customers) as customer FROM (
      SELECT GENERATE_FAKE_CUSTOMERS(50) as customers))

num_affected_rows,num_inserted_rows


In [0]:
SELECT
  *
FROM
  fake_reviews

review_date,review_id,customer_id,review
2022-01-01,1234567890,1,"I loved the new Kellogg's cereal, it's crunchy and delicious!"
2022-01-05,2345678901,2,"The Coca-Cola soda I received was flat, very disappointing."
2022-01-10,3456789012,3,"The Nestle coffee is okay, but the price is a bit high for the quality."
2022-01-15,4567890123,4,I've been buying the Pepsi soda for years and it never fails to satisfy my thirst.
2022-01-20,5678901234,5,"The General Mills yogurt is a great snack, but the packaging could be more eco-friendly."
2022-01-25,6789012345,6,"I was really looking forward to trying the new Lay's chips, but they were too spicy for my taste."
2022-02-01,7890123456,7,"The Pringles can is very convenient, but the flavor is not as good as the original."
2022-02-05,8901234567,8,"I've tried many different types of coffee, but the Starbucks coffee is still my favorite."
2022-02-10,9012345678,9,"The Kraft mac and cheese is a classic, but it's not as healthy as I would like."
2022-02-15,1023456789,10,"I was surprised by how much I enjoyed the Dove chocolate, it's rich and creamy."


In [0]:
SELECT * FROM fake_customers

customer_id,firstname,lastname,order_count
1,Eleanor,Wiggins,143
2,Lucas,Brooks,67
3,May,Parker,118
4,Oliver,Russell,191
5,Ava,Lee,52
6,Isabella,Hall,82
7,Mason,Walker,129
8,Charlotte,Young,46
9,Logan,Allen,165
10,Abigail,King,97
