In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from openai import OpenAI
import re

# Settings

seeds = 42,53,64,75,86

np.random.seed(seeds[4])

sample_size = 100 

n_repl = 10

# Independent variables (features)
x = np.random.normal(0, 1, sample_size)

# Coefficients for the regression model
const = 1.0    # Intercept
slope = 1.0    # Coefficient for x1
sigma = 0.8
# Generate the dependent variable 
y = const + slope * x + np.random.normal(0, sigma, sample_size)

# Create a DataFrame with the simulated data
df = pd.DataFrame({'y': y, 'x': x})

# Export it for the manual chatgpt estimates

df.to_csv('y_x_test.csv',index=False)

# Fit a linear regression model using statsmodels
X = df[['x']]  # Independent variables
X = sm.add_constant(X)  # Adds a constant term for the intercept
model = sm.OLS(df['y'], X).fit()  # Fit the model
coefficients = model.params
print("Estimated Coefficients:")
print(coefficients)


In [None]:
# Do the prompting chatgpt fit

client = OpenAI(api_key="key",
organization='org')

# Prepare the prompt and send it to the ChatGPT API

data_str = df.to_string(index=False)
    
messages = [
    {"role": "system", "content": "You are an econometrician."},
    {
        "role": "user", 
        "content": (
            f"Perform a linear regression on the following dataset:\n\n"
            f"{data_str}\n\n"
            "Use 'y' as the dependent variable and 'x' as independent variable."
            "Give the actual values of the parameter estimates in an output table and use the name const \\\
            for the intercept and slope for the slope coefficient." 
        )
    }
]
    
completion = client.chat.completions.create(
      model="gpt-4o",
      messages=messages
    )

assistant_completion = completion.choices[0].message.content
print(assistant_completion)