# Imports

In [1]:
import pandas as pd
from think_reason_learn.policy_induction import PolicyInduction, WeightTrainerConfig
from think_reason_learn.core.llms import GoogleChoice, OpenAIChoice

# Log level

In [2]:
import logging
import sys

logging.basicConfig(
    level=logging.INFO,  # You might want debug or info
    stream=sys.stdout,
    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    force=True,
)

logging.getLogger("google_genai.models").setLevel(logging.ERROR)
logging.getLogger("google_genai.models").propagate = False

# Data

In [3]:
person1 = """\
A is a 30-year-old woman living in San Francisco. She studied computer science at \
Stanford and worked for six years as a senior engineer at Google on \
large-scale distributed systems. She recently left to start an AI-powered healthcare \
analytics company and has already raised a $2M seed round from \
well-known Bay Area investors.
"""

person2 = """\
B is a 25-year-old man based in New York City. He graduated with a degree in \
marketing from NYU and has been working as a marketing manager at Apple for \
the past three years. He is trying to launch a social media app. Before apple, \
he was a product manager at Facebook.
"""

person3 = """\
C is stay in Los Angeles. He is a practicing medical doctor at UCLA \
and is working on a remote patient monitoring platform. He has limited \
technical knowledge and no startup experience, relying heavily on contractors \
for development. He is also a big fan of the Lakers.
"""

person4 = """\
D is a 40-year-old man living in Chicago. He studied law at the University of \
Chicago and has built a career as a corporate lawyer specializing in \
mergers and acquisitions. He is exploring a legal-tech startup idea but is \
still working full-time at his law firm and has no technical or entrepreneurial \
background.
"""

person5 = """\
E is a 28-year-old woman in San Francisco. She studied computer engineering at \
UC Berkeley and worked as a software engineer at a YC-backed fintech startup \
that scaled rapidly. She is now building her own fintech product for underbanked \
communities and has early traction with pilot customers in Latin America.
"""

person6 = """\
F is a 32-year-old man based in New York City. He earned his MBA from Columbia \
Business School after working in marketing roles at Apple and Spotify. He is \
now working on a consumer subscription box startup, but customer acquisition costs \
have been high, and he is struggling to attract investors without stronger traction.
"""

person7 = """\
G is a 27-year-old woman living in Austin, Texas. She studied industrial engineering \
at MIT and later worked as a product manager at Amazon, focusing on supply chain \
logistics. She has teamed up with two cofounders from her professional network to \
launch a logistics automation startup and recently joined a prominent accelerator.
"""

person8 = """\
H has worked in 7 companies, in 3 different industries. He is currently a product \
manager at a startup in the fintech industry. He is looking to launch a new \
product in the edutech industry.
"""

In [4]:
X = pd.DataFrame(
    {
        "data": [
            person1,
            person2,
            person3,
            person4,
            person5,
            person6,
            person7,
            person8,
        ]
    }
)
y = ["YES", "NO", "NO", "YES", "NO", "NO", "YES", "NO"]

# Policy Induction

In [5]:
config = WeightTrainerConfig(cv_folds=3, penalty="l1")
pi = PolicyInduction(
    gen_llmc=[
        GoogleChoice(model="gemini-2.0-flash-lite"),
        OpenAIChoice(model="gpt-4.1-nano"),
    ],
    predict_llmc=[
        GoogleChoice(model="gemini-2.0-flash-lite"),
        OpenAIChoice(model="gpt-4.1-nano"),
    ],
    config=config,
    max_policy_length=5,
)

In [6]:
instructions_tem = await pi.set_task(
    task_description="Predict if a startup founder will be successful "
    "or fail based on their background.",
)
print(instructions_tem)

2025-11-23 19:15:53,481 INFO think_reason_learn.policy_induction._policy_induction: Generated policy generation instructions with confidence 0.6712630521888587
You are tasked with refining a set of existing policies for classifying startup founder success, using new data and insights. Your goal is to enhance the existing policies by incorporating knowledge from the provided data, clarifying any ambiguities, generalizing decision rules, and adding new policies when novel patterns emerge. Do not replace or discard the existing policies.

Here's how to approach the refinement:

1.  **Analyze New Data:** Examine the new data samples, paying close attention to the features of successful and failing founders. Look for patterns, correlations, and key differentiators.

2.  **Evaluate Existing Policies:** Assess the current policies' performance against the new data. Identify areas where the policies correctly classify founders and areas where they struggle.

3.  **Refine & Expand Policies:**
 

In [7]:
pi = await pi.fit(X, y, reset=False)

2025-11-23 19:15:53,505 INFO think_reason_learn.policy_induction._policy_induction: Generating Policies
2025-11-23 19:15:55,237 INFO think_reason_learn.policy_induction._policy_induction: Generated policies: 5
2025-11-23 19:15:55,240 INFO think_reason_learn.policy_induction._policy_induction: Scoring Policies
2025-11-23 19:16:02,943 INFO think_reason_learn.policy_induction._policy_induction: Setting policy weight
2025-11-23 19:16:02,951 INFO think_reason_learn.policy_induction._policy_induction: Fitting weights: 8 samples Ã— 5 models
2025-11-23 19:16:03,397 INFO think_reason_learn.policy_induction._policy_induction: C=0.001 mean F0.5=0.44160
2025-11-23 19:16:03,819 INFO think_reason_learn.policy_induction._policy_induction: C=0.01 mean F0.5=0.44160
2025-11-23 19:16:04,244 INFO think_reason_learn.policy_induction._policy_induction: C=0.1 mean F0.5=0.44160
2025-11-23 19:16:04,651 INFO think_reason_learn.policy_induction._policy_induction: C=1 mean F0.5=0.44160
2025-11-23 19:16:05,058 INF

In [8]:
pi.get_memory()

Unnamed: 0,policy,predictions
0,Founders with strong technical backgrounds (co...,0 YES 1 NO 2 NO 3 NO 4 YES 5...
1,Founders with experience in building and scali...,0 YES 1 YES 2 NO 3 NO 4 YES 5...
2,Founders with significant domain expertise and...,0 YES 1 YES 2 NO 3 NO 4 YES 5...
3,"Founders with limited technical knowledge, a l...",0 YES 1 NO 2 NO 3 NO 4 YES 5...
4,Founders working full-time in unrelated fields...,0 NO 1 NO 2 NO 3 NO 4 NO 5 N...


In [9]:
async for sample_index, results, final_answer, token_counter in pi.predict(X):
    print(f"Sample {sample_index}, Predict: {final_answer}")

2025-11-23 19:16:05,933 INFO think_reason_learn.policy_induction._policy_induction: Predicting 8 samples with max 3 concurrent workers
Sample 0, Predict: YES
Sample 1, Predict: NO
Sample 2, Predict: YES
Sample 3, Predict: YES
Sample 4, Predict: YES
Sample 5, Predict: NO
Sample 6, Predict: YES
Sample 7, Predict: NO


# Saving

In [10]:
pi.save("example_policy_induction", for_production=True)

# Loading

In [11]:
loaded_pi = PolicyInduction.load("example_policy_induction")

In [12]:
X_predict = X
y_predict = y

async for sample_index, results, final_answer, token_counter in loaded_pi.predict(
    X_predict
):
    idx = int(sample_index)
    print(f"Sample {idx}, Predict: {final_answer}, Target: {y_predict[idx]}")

2025-11-23 19:16:12,899 INFO think_reason_learn.policy_induction._policy_induction: Predicting 8 samples with max 3 concurrent workers
Sample 0, Predict: YES, Target: YES
Sample 1, Predict: NO, Target: NO
Sample 2, Predict: YES, Target: NO
Sample 3, Predict: YES, Target: YES
Sample 4, Predict: YES, Target: NO
Sample 5, Predict: NO, Target: NO
Sample 6, Predict: YES, Target: YES
Sample 7, Predict: NO, Target: NO
