# Explore DSPY (Declarative Self-improving Python)

In [28]:
import dspy
lm = dspy.LM('ollama_chat/gemma3:12b', api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

In [29]:
lm("Say this is a test!", temperature=0.7)  # => ['This is a test!']
lm(messages=[{"role": "user", "content": "Say this is a test!"}])  # => ['This is a test!']

['Okay! Consider this a test. 😊 \n\nWhat\'s the question? Let\'s see how you do! 😉\n\n\n\nLet me know what you need me to do for this "test."']

In [30]:
## This is a wikipedia search
def search_wikipedia(query: str) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

rag = dspy.ChainOfThought('context, question -> response')
question = "What's the name of the castle that David Gregory inherited?"
rag(context=search_wikipedia(question), question=question)


Prediction(
    reasoning='The question asks for the name of the castle that David Gregory inherited. According to the provided context [1], David Gregory inherited Kinnairdy Castle in 1664.',
    response='Kinnairdy Castle.'
)

In [31]:
import os
import chromadb

CHROMADB_API_TOKEN = os.getenv('CHROMA_API_KEY')

chroma_client = chromadb.HttpClient(
    ssl=True,
    host='api.trychroma.com',
    tenant='c74d6ead-7a1a-4e7d-afbb-3dd8d548c5ed',
    database='rag-0a14d70f',
    headers={
        'x-chroma-token': CHROMADB_API_TOKEN
    }
)



In [32]:
def rag_test(query: str) -> str:
    collection = chroma_client.get_collection(name="doc_262ab1b3-0de0-4435-8858-3f538e030d24_1743956562_c92fa3f7")
    result = collection.query(query_texts=[query],
    n_results=2,
    )
    return result["documents"][0]

In [39]:
rag_test("What are the that that next to be taken to me a good data scienctist")

['slide credit pedro domingos 47 ml in practice understand domain, prior knowledge, and goals data integration, selection, cleaning, preprocessing, etc.',
 'ml is used when human expertise does not exist navigating on mars humans cant explain their expertise speech recognition models must be customized personalized medicine models are based on huge amounts of data genomics learning isnt always useful there is no need to learn to calculate payroll based on slide by e.']

In [40]:
rag = dspy.ChainOfThought('context, question -> response')

In [42]:
question = "What are the that that next to be taken to me a good data scienctist"
response = rag(context=rag_test(question), question=question)

In [43]:
response["response"]

"To become a good data scientist, you need a combination of technical skills and domain understanding. Here's a breakdown of key areas to focus on:\n\n*   **Domain Expertise:** Understand the specific field you're working in. Data science isn't just about algorithms; it's about applying them to solve real-world problems.\n*   **Data Handling:** Master data integration, selection, cleaning, and preprocessing techniques. This is a significant portion of a data scientist's work.\n*   **Machine Learning Fundamentals:** Develop a strong understanding of machine learning algorithms and when to apply them, especially in situations where human expertise is difficult to articulate or when dealing with large datasets.\n*   **Problem Solving:** Be able to define problems, formulate hypotheses, and evaluate solutions using data.\n*   **Communication:** Effectively communicate your findings and insights to both technical and non-technical audiences."

In [44]:
response

Prediction(
    reasoning='The question asks what steps are needed to become a good data scientist. The context provides clues about the skills and knowledge involved. It highlights the importance of understanding the domain, prior knowledge, and goals, as well as data integration, cleaning, and preprocessing. It also mentions situations where machine learning is useful (when human expertise is lacking or when dealing with large datasets like in personalized medicine). Therefore, a good response should focus on these aspects.',
    response="To become a good data scientist, you need a combination of technical skills and domain understanding. Here's a breakdown of key areas to focus on:\n\n*   **Domain Expertise:** Understand the specific field you're working in. Data science isn't just about algorithms; it's about applying them to solve real-world problems.\n*   **Data Handling:** Master data integration, selection, cleaning, and preprocessing techniques. This is a significant portion 