In [1]:
import sys
from pathlib import Path
import os
from dotenv import load_dotenv

# 📁 Ensure /src is in the path so imports work
sys.path.insert(0, str(Path.cwd().parent / "src"))

# 🌍 Load your environment variables from .env
load_dotenv()

# 🔑 Grab the API key
openai_key = os.getenv("OPENAI_API_KEY")

# ✅ Confirm it's loaded (optional sanity check)
assert openai_key is not None, "OPENAI_API_KEY is missing!"


from langchain_openai import ChatOpenAI


llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key)




In [2]:
from langchain.prompts import ChatPromptTemplate

# 🧠 Load context summary from previous step
from context import generate_context_summary

# 🔄 Re-load and prepare the dataset
from cleaning import load_dataset, prepare_dataset

dataset_name = "FloridaBikeRentals.csv"
df = prepare_dataset(load_dataset(dataset_name))
context_summary = generate_context_summary(df)

# 💬 Define the system and user prompts
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant that explains data to beginners in plain English."),
    ("human", "Here is a summary of a dataset:\n\n{summary}\n\nExplain this clearly.")
])

# 🗣️ Format and send prompt to LLM
chain = prompt | llm
response = chain.invoke({"summary": context_summary})

# 🧾 Print the bot's message
print("🤖 Databot says:\n")
print(response.content)


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
from context import generate_context_summary
from cleaning import load_dataset, prepare_dataset

dataset_name = "FloridaBikeRentals.csv"
df = prepare_dataset(load_dataset(dataset_name))
context = generate_context_summary(df)


In [None]:
def ask_databot(user_question, context_summary):
    messages = [
        SystemMessage(content=f"You are a helpful data assistant. Here is the dataset context:\n{context_summary}"),
        HumanMessage(content=user_question)
    ]
    return llm(messages).content

# Example query
ask_databot("Are there any missing values?")


In [None]:
# 03_user_questions.ipynb

import sys
from pathlib import Path

# 🔧 Add src/ folder to import cleaning + context utils
sys.path.insert(0, str(Path.cwd().parent / "src"))

# 📦 Import functions
from cleaning import load_dataset, prepare_dataset, get_missing_value_summary
from context import generate_context_summary

# 🔧 Dataset selector (same as previous notebook)
dataset_name = "FloridaBikeRentals.csv"

# 📄 Load and prepare dataset
df = load_dataset(dataset_name)
df = prepare_dataset(df)

# 🧠 Generate context summary for chatbot to "know"
context_summary = generate_context_summary(df)


In [None]:
# Example simulated user question
user_question = "Are there any missing values?"

# Basic logic for now, later this will be handled by the chatbot
if "missing" in user_question.lower():
    print("🤖 Databot says:\n")
    print(context_summary.split("🕳️")[1].strip())
