In [1]:
from openai import OpenAI
import json
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from typing import List, Dict

In [2]:
client = OpenAI(api_key="INSERT KEY HERE")

In [3]:
def run_regression(data: List[Dict[str, float]]) -> Dict[str, float]:
    """
    Runs a simple linear regression model based on provided data.
    
    Parameters:
    - data: A list of dictionaries, each containing 'x' and 'y' values.
    
    Returns:
    - A dictionary with regression coefficients and R-squared score.
    """
    df = pd.DataFrame(data)
    
    X = df[['x']].values
    y = df['y'].values
    
    model = LinearRegression()
    model.fit(X, y)
    
    return {
        "slope": model.coef_[0],
        "intercept": model.intercept_,
        "r_squared": model.score(X, y)
    }

In [5]:
tools = [{
    "type": "function",
    "function": {
        "name": "run_regression",
        "description": "Run a simple linear regression on x-y data points.",
        "parameters": {
            "type": "object",
            "properties": {
                "data": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "x": {"type": "number"},
                            "y": {"type": "number"}
                        },
                        "required": ["x", "y"],
                        "additionalProperties": False
                    },
                    "description": "List of data points with x and y values."
                }
            },
            "required": ["data"],
            "additionalProperties": False,
        },
        "strict": True
    }
}]

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Run a regression on the following data: [{\"x\": 1, \"y\": 2}, {\"x\": 2, \"y\": 2.5}, {\"x\": 3, \"y\": 3.7}]."}
]

In [6]:
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    tools=tools
)

if completion.choices[0].message.tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:
        if tool_call.function.name == "run_regression":
            function_args = json.loads(tool_call.function.arguments)
            result = run_regression(**function_args)

            messages.append({
                "role": "function",
                "name": "run_regression",
                "content": json.dumps(result)
            })

            final_response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages
            )

            print(final_response.choices[0].message.content)


Based on the data provided, the results of the regression analysis are as follows:

- **Slope**: 0.85
- **Intercept**: 1.0333
- **R-squared**: 0.9465

This indicates that the regression line has a positive slope of 0.85, suggesting that for every unit increase in \( x \), \( y \) increases by 0.85 units. The intercept of approximately 1.0333 suggests that when \( x \) is 0, the predicted value of \( y \) is about 1.0333.

The R-squared value of approximately 0.9465 indicates a strong fit of the model to the data, meaning that about 94.65% of the variability in \( y \) can be explained by the variability in \( x \).
