In [None]:
import urllib as urllib
import gradio as gr
import json
import openai
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
import configparser
import time
from astral import sun,Observer
import pickle
import datetime as dt
import pandas as pd

from datasets import load_dataset




In [None]:
config = configparser.ConfigParser()
config.read('config.ini')


openai.api_key =config['DEFAULT']['openai_api_key']


In [None]:

dataset = load_dataset("scikit-learn/auto-mpg")
df=pd.DataFrame(dataset['train'])


dsInfo = "car mpg statistics"

In [None]:


def aggGroupBy(target,groupList=None,functionList=['mean'],filterConditions=None,precision=2):
    if ',' in functionList:
        functionList=functionList.split(',')
    if type(groupList)!=list:
        groupList = [groupList]
    if type(functionList)!=list:
        functionList=[functionList]
    if groupList is None:
        report=df.agg({target:functionList}).round(precision)
    if filterConditions is None:
        report=df.groupby(groupList).agg({target:functionList}).round(precision)
    else:
        print(filterConditions)
        report=df.groupby(groupList).agg({target:functionList}).round(precision)
    return report.head(20).to_csv()


def minOrMaxCase(target,aggFunction):
    if aggFunction == 'max':
        rowOrder = False
    else:
        rowOrder=True
    if target=='acceleration' and rowOrder==False:
        rowOrder = True
    elif target=='acceleration':
        rowOrder = False
    report=df.sort_values(target,ascending=rowOrder).head(1)
    report = json.dumps(report.to_dict(orient='records')[0])
    return report


functions = [
    {
        "name": "aggGroupBy",
        "description": '''Makes an aggregated report from the dataset.
          Lets the user specify a variable to be analyzed, one or more grouping variables, 
          and one or more aggregation functions from (min,max,mean,std).  You can also use pandas to filter the dataframe
          by specifying filterConditions.''',
        "parameters": {
            "type": "object",
            "properties": {
                "groupList": {
                    "type": "string",
                    "description": "List of variable names to group by",
                },
                "target": {"type": "string","description":"the variable name to be analyzed and aggregated"},
            
            "functionList": {"type": "string","description":"list of pandas aggregation functions, chosen from min, max, mean, std"},
            
            "filterConditions": {"type": "string","description":"pandas where conditions to filter the dataset with"},
            },},
            "required": ["target", "functionList"],
        },
    {
        "name": "minOrMaxCase",
        "description": '''Finds one case from the dataframe with a max or min value on the specified target variable.''',
        "parameters": {
            "type": "object",
            "properties": {
                "aggFunction": {
                    "type": "string",
                    "description": "Specifies if the user wants the argmax or argmin.  return either 'min' or 'max'",
                },
                "target": {"type": "string","description":"the variable name to be analyzed and aggregated"},
            },
            "aggregations": {"type": "string","description":"list of pandas aggregation functions, including min, max"},
            
            "filterConditions": {"type": "string","description":"pandas where conditions to filter the dataset with"},
            },
            "required": ["target"],
        }
    
]



In [None]:
varnames = ','.join(list(df.columns))


messages = [
        {  
        "role": "system",
        "content": """You are a helpful assistant that analyzes a dataset about """+dsInfo+"""for the user. You can use
        function calls to get data, or respond in sentences when proovided the data.
        If you don't know how to apply to a function, apoligize and say you don't know how to do that.
          You can answer questions about these variable names :"""+varnames+"""
        All parameter values must be one of the specified variable names.
        Do not make up information, and keep your responses concise.
        If you identify the need to reply with a function call, but some required parameter are missing,
          reply with a follow up question asking for missing parameters.
          You can also answer questions about the dataset metadata, and explain statistical analyses."""
    },
    {'role':"user",
     'content':"Make a report of mean and max displacement by number of cylinders. Aggregatte functions average, min and max mileage per number of cylinders."}
]


In [None]:
#messages.append({'role':'user','content':'what is the average mileage by year?'})
response = callLLM(messages,functions)
cr = classifyReponse(response)
cr

In [None]:
#run a function to check if its got the right info, and send back if the function call doesnt make sense.  

In [None]:
f = response['choices'][0]['message']['function_call']['name']
kwargs = response['choices'][0]['message']['function_call']['arguments']
kwargs = json.loads(kwargs)
print(kwargs,f)
function_response = globals()[f](**kwargs)

In [None]:
print(kwargs)
print(function_response)
type(function_response)


In [None]:
messages.append({'role':'system','content':'''use the following data to concisely answer the user's question above accurately.
                 Provide one or two other interesting facts from the JSON data.
                 Do not use a function call, but response with the data included here in JSON:JSON:'''+function_response})
response = callLLM(messages,functions)
cr = classifyReponse(response)

json.dumps(df.sort_values('mpg',ascending=True).head(1).to_dict(orient='rows')[0])

In [None]:
def clearItOut():
    global messages
    messages = [
        {  
        "role": "system",
        "content": """You are a helpful assistant that analyzes a dataset about """+dsInfo+"""for the user. 
        If the user's question is not directly related to the dataset, politely reject it.
        You can use function calls to get data, or respond in sentences when proovided the data.
        If you don't know how to apply to a function, apoligize and say you don't know how to do that.
          You can answer questions about these variable names :"""+varnames+"""
        All parameter values must be one of the specified variable names.
        Do not make up information, and keep your responses concise.
        If you identify the need to reply with a function call, but some required parameter are missing,
          reply with a follow up question asking for missing parameters.
          You can also answer questions about the dataset metadata, and explain statistical analyses."""
    },
    ]

In [None]:


def classifyReponse(response):
    if response['choices'][0]['message']['content'] is not None:
        return 'content'
    elif 'function_call' in response['choices'][0]['message']:
        return 'function_call'
    
def prepResponse(response):
    cr = classifyReponse(response)
    print("classified as ",cr)
    if cr=='function_call':
        f = response['choices'][0]['message']['function_call']['name']
        kwargs = json.loads(response['choices'][0]['message']['function_call']['arguments'])
        function_response = globals()[f](**kwargs)
        #return formatted message and logic to call llm again
        message = {
            "role": "function",
            "name": f,
            "content": ','.join(function_response),
        }
        action = 'llm'
        bot_message = None
    elif cr=='content':
        newContent = response['choices'][0]['message']['content']
        #return formatted message and logic to send to ui
        message = {
            "role": "assistant",
            "content": newContent,
        }
        action = 'ui'
        bot_message = newContent
    return bot_message, message,action

def callLLM(messages,functions):
    raw_message = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=messages,
                functions = functions, temperature = 0.
            )
    return raw_message





In [None]:
import datetime
startUpTime = datetime.datetime.now()


with gr.Blocks(title="Conversational BI Demo",theme='YenLai/Superhuman') as demo:
    gr.Markdown("Use the textbox below to ask questions about the dataset.")
    chatbot = gr.Chatbot()
    msg = gr.Textbox("What is the car with the best mileage?") 
    clear = gr.Button("Clear")
    gr.Markdown("To learn about this demo, go to my Github at https://github.com/jonathanmanly/runGearGuruconvBI")

    def user(user_message, history):
        if ((dt.datetime.now()-startUpTime).seconds)/60>10:
            clearItOut()
        return "", history + [[user_message, None]]

    def bot(history):
        raw_message=''
        messages.append({'role':'user',"content":history[-1][0]})
        raw_message = callLLM(messages,functions)
        print(raw_message)
        bot_message, history_message,action = prepResponse(raw_message)
        #print(2)
        messages.append(history_message)
        #print(3)
        if action =='llm':
            raw_message = callLLM(messages,functions)
            #print(4)
            bot_message, history_message,action = prepResponse(raw_message)
            #print(5)
            #print("at function call")
            #print(bot_message,history_message,action)
            messages.append(history_message)

            
        #determine here if it is function or content, then call function and produce content if so
        #bot_message = raw_message['choices'][0]['message']['content']
        #messages.append({"role":"assistant","content":bot_message})
        
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            time.sleep(0.001)
            yield history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    #clear.click(lambda: None, None, chatbot, queue=False)
    clear.click(clearItOut, None, chatbot, queue=False)
    
demo.queue()
demo.launch()