In [1]:
import mlflow
import openai
import os
import pandas as pd

# you must set the OPENAI_API_KEY environment variable
assert "OPENAI_API_KEY" in os.environ, "Please set the OPENAI_API_KEY environment variable."
mlflow.end_run()


* 'schema_extra' has been renamed to 'json_schema_extra'


In [None]:

# set the experiment id
mlflow.set_experiment(experiment_id="874674834457735198")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

system_prompt = "You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists."
explain_system_message = {
        "role": "system",
        "content": "You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists.",
    }
explain_user_message = {
        "role": "user",
        "content": "Please explain the following Python function. Review what each element of the function is doing precisely and what the author's intentions may have been. Organize your explanation as a markdown-formatted, bulleted list."


}
explain_messages = [explain_system_message, explain_user_message]


# start a run
mlflow.start_run()
mlflow.log_param("system_prompt", system_prompt)
mlflow.log_param("temperature", 0.4)
mlflow.log_param("top_p", 0.8)

# Create a question answering model using prompt engineering with OpenAI. Log the model
# to MLflow Tracking
logged_model = mlflow.openai.log_model(
  model="gpt-3.5-turbo",
  task=openai.ChatCompletion,
  artifact_path="model",
  temperature=0.4,
    top_p = 0.8,
  messages=explain_messages
)
example_function1 = """def compute_tax(income):
    if income <= 18200:
        tax = 0
    elif income <= 37000:
        tax = (income - 18200) * 0.19
    elif income <= 90000:
        tax = (income - 37000) * 0.235 + 3572
    elif income <= 180000:
        tax = (income - 90000) * 0.37 + 20797
    else:
        tax = (income - 180000) * 0.45 + 54097

    return tax
"""
example_function2 = """from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[1]").appName("SparkByExamples.com").getOrCreate()
address = [(1,"14851 Jeffrey Rd","DE"),
    (2,"43421 Margarita St","NY"),
    (3,"13111 Siemon Ave","CA")]
df =spark.createDataFrame(address,["id","address","state"])
from pyspark.sql.functions import regexp_replace
df.withColumn('address', regexp_replace('address', 'Rd', 'Road')) \
  .show(truncate=False)
from pyspark.sql.functions import when
df.withColumn('address',
    when(df.address.endswith('Rd'),regexp_replace(df.address,'Rd','Road')) \
   .when(df.address.endswith('St'),regexp_replace(df.address,'St','Street')) \
   .when(df.address.endswith('Ave'),regexp_replace(df.address,'Ave','Avenue')) \
   .otherwise(df.address)) \
   .show(truncate=False)
stateDic={'CA':'California','NY':'New York','DE':'Delaware'}
df2=df.rdd.map(lambda x:
    (x.id,x.address,stateDic[x.state])
    ).toDF(["id","address","state"])
"""
# Evaluate the model on some example questions
functions = pd.DataFrame(
  {
      "function": [
          example_function1,
          example_function2

      ]
  }
)
mlflow.evaluate(
  model=logged_model.model_uri,
  model_type="question-answering",
  data=functions,
)
mlflow.end_run()

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2023/10/20 20:58:47 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
