# Test Notebook (local)

In [0]:
%sql
select
  *
from
  samples.nyctaxi.trips
limit 10

In [0]:
%sh
databricks --version

In [0]:
1+1

In [0]:
import sklearn as sk

sk.__version__

In [0]:
%fs ls

In [0]:
#dbutils.fs.ls('dbfs:/mnt/data/export_models/')
dbutils.fs.ls('dbfs:/mnt/Users')

In [0]:
num_terms = 1000000
pi = 0
for i in range(num_terms):
    denominator = 2 * i + 1
    term = 4 / denominator * (-1) ** i
    pi += term

print(pi)

In [0]:
import math

pi = round(math.pi, 10)
print(pi)

In [0]:
# generate a random string of 10 characters
import uuid
uuid.uuid4().hex[:10]


In [0]:
spark.conf.set("spark.securitymanager.enabled", False)
display(spark.sql("show tables").filter("upper(tableName) like '%G%'").limit(10))

## header 2

In [0]:
%sql
show catalogs like 'guido*';

In [0]:
%sql
use catalog guido;

## onother headline comes here...

In [0]:
%sql
SELECT current_catalog();

In [0]:
%sql
show databases;

In [0]:
%sql
select * from samples.nyctaxi.trips limit 10;

In [0]:
df = spark.table("samples.nyctaxi.trips").limit(10)
display(df)

In [0]:
%pip install -U sentence-transformers

In [0]:
from sentence_transformers import SentenceTransformer

In [0]:
#load pretrained transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

#the sentences to encode
sentences = [
  "This is a sentence",
  "This is another sentence",
  "This is a third sentence"
  ]

#calculate embeddinggs calling sentences.encode
embeddings = model.encode(sentences)
print(embeddings.shape)

In [0]:
from faker import Faker
import pandas as pd

# Initialize Faker
fake = Faker()

# Generate fake data for a CRM database
def generate_customers(n=1000):
    customers = [{
        "customer_id": i,
        "first_name": fake.first_name(),
        "last_name": fake.last_name(),
        "email": fake.email(),
        "phone": fake.phone_number()
    } for i in range(1, n + 1)]
    return pd.DataFrame(customers)

def generate_orders(n=300, customer_count=1000):
    orders = [{
        "order_id": i,
        "customer_id": fake.random_int(min=1, max=customer_count),
        "order_date": fake.date_this_year(),
        "amount": round(fake.random_number(digits=5), 2)
    } for i in range(1, n + 1)]
    return pd.DataFrame(orders)

def generate_interactions(n=500, customer_count=1000):
    interactions = [{
        "interaction_id": i,
        "customer_id": fake.random_int(min=1, max=customer_count),
        "interaction_date": fake.date_this_year(),
        "channel": fake.random_element(elements=("Email", "Phone", "In Person", "Social Media")),
        "notes": fake.text(max_nb_chars=200)
    } for i in range(1, n + 1)]
    return pd.DataFrame(interactions)

# Create DataFrames
customers_df = generate_customers()
orders_df = generate_orders()
interactions_df = generate_interactions()

# Convert DataFrames to Spark DataFrames
customers_sdf = spark.createDataFrame(customers_df)
orders_sdf = spark.createDataFrame(orders_df)
interactions_sdf = spark.createDataFrame(interactions_df)

# Display the Spark DataFrames
display(customers_sdf)
display(orders_sdf)
display(interactions_sdf)

# Save the Spark DataFrames to the guido.default database
customers_sdf.write.format("delta").mode("overwrite").saveAsTable("guido.default.customers")
orders_sdf.write.format("delta").mode("overwrite").saveAsTable("guido.default.orders")
interactions_sdf.write.format("delta").mode("overwrite").saveAsTable("guido.default.interactions")

In [0]:
%pip install mlflow databricks-agents
dbutils.library.restartPython()

In [0]:
import mlflow
import pandas as pd

# Load your model from MLflow
logged_model = mlflow.pyfunc.load_model(model_uri="models:/basic_rag_bot_Content_discovery/1")

examples =  {
    "request": [
      {
      # Recommended `messages` format
        "messages": [{
          "role": "user",
          "content": "Spark is a data analytics framework."
        }],
      },
      # Primitive string format
      # Note: Using a primitive string is discouraged. The string will be wrapped in the
      # OpenAI messages format before being passed to your agent.
      "How do I convert a Spark DataFrame to Pandas?"
    ],
    "response": [
        "Spark is a data analytics framework.",
        "This is not possible as Spark is not a panda.",
    ],
    "retrieved_context": [ # Optional, needed for judging groundedness.
        [{"doc_uri": "doc1.txt", "content": "In 2013, Spark, a data analytics framework, was open sourced by UC Berkeley's AMPLab."}],
        [{"doc_uri": "doc2.txt", "content": "To convert a Spark DataFrame to Pandas, you can use toPandas()"}],
    ],
    "expected_response": [ # Optional, needed for judging correctness.
        "Spark is a data analytics framework.",
        "To convert a Spark DataFrame to Pandas, you can use the toPandas() method.",
    ]
}

result = mlflow.evaluate(
    data=pd.DataFrame(examples),    # Your evaluation set
    model=logged_model.model_uri, # If you have an MLFlow model. `retrieved_context` and `response` will be obtained from calling the model.
    model_type="databricks-agent",  # Enable Mosaic AI Agent Evaluation
)

# Review the evaluation results in the MLFLow UI (see console output), or access them in place:
display(result.tables['eval_results'])