# Assigning Applicants to Horizontals

In [None]:
from pathlib import Path

import pandas as pd

# Load the CSV
venture_data_csv = Path("..") / "data" / "venture_data.csv"
df = pd.read_csv(venture_data_csv)

In [None]:
from enum import Enum
from typing import Tuple

from pydantic import BaseModel, Field

class Horizontal(str, Enum):
    ai_ml = "Artificial Intelligence and Machine Learning"
    climate = "Climate"
    creator_economy = "Creator Economy"
    education = "Education"
    hardware = "Hardware"
    healthcare = "Healthcare"
    revenue_org = "Revenue Organisation"
    social_entrepreneurship = "Social Entrepreneurship"
    web3 = "Web3"

class Classification(BaseModel):
    horizontal: Horizontal
    confidence: float = Field(ge=0.0, le=1.0)
    argumentation: str

class Top2(BaseModel):
    top: Tuple[Classification, Classification]

system_prompt = f"""
You have to classify applicants into the following horizontals based on their application:

- Artificial Intelligence & Machine Learning: machine learning, data science, AI, models, neural networks, prompt engineering, TensorFlow, PyTorch, automation, prediction, ethical AI.
- Climate: sustainability, carbon neutrality, green tech, environmental activism, emissions, net zero, biodiversity, climate action.
- Creator Economy: content creation, YouTube, TikTok, podcasting, personal brand, monetization, online communities, creative tools, video editing.
- Education: learning, teaching, mentoring, access to education, tutoring, EdTech, educational content, pedagogy, MOOCs, learning gaps, curriculum.
- Hardware: devices, IoT, robotics, electronics, mechanical design, physical products, wearables, sensors, embedded systems, prototyping, Arduino, CAD.
- Healthcare: health, medicine, wellness, mental health, biotech, public health, clinical work, med school, diagnostics, health data, patient care.
- Revenue Organisation: sales, growth, marketing, monetization, CAC, LTV, GTM strategy, funnel optimization, CRM, growth loops, pricing.
- Social Entrepreneurship: impact, equity, inclusion, fairness, NGOs, non-profits, impact startups, volunteering.
- Web3: blockchain, crypto, decentralization, smart contracts, DAOs, NFTs, DeFi, Web3 infrastructure, ownership, transparency, or trustless systems

Output the horizontal, your confidence and reasoning for your top 2.
"""

def format_prompt(row):
    return f"""
Applicant    
Self Description: {row['Self Description']}
Venture Description: {row['Venture Description']}
Venture Industry: {row['Venture Industry']}
Venture Market: {row['Venture Market']}
Venture Technology: {row['Venture Technology']}
Venture KPIs: {row['Venture KPIs']}
Venture Problem: {row['Venture Problem']}
Venture Impact: {row['Venture Impact']}
Venture Vision: {row['Venture Vision']}
Venture SDGs: {row['Venture SDGs']}

Please classify this applicant into a horizontal.
"""

In [None]:
# Example prompt
print(format_prompt(df.iloc[0]))

In [None]:
import ollama, outlines

client = ollama.Client()
model = outlines.from_ollama(
    client,
    "mistral",
)

## Toy Example

In [None]:
model("Hello, what is the meaning of life?")

## Assignment

In [None]:
import logging

# Basic logging setup
logging.basicConfig(
    filename="prediction.log",
    level=logging.INFO,
    format='%(asctime)s | %(levelname)s | %(message)s'
)

## 

In [None]:
from tqdm import tqdm

horizontals = []
confidences = []
arguments = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    prompt = format_prompt(row)
    logging.info(f'Self Description: {row["Self Description"]}')
    logging.info(f'Venture Description: {row["Venture Description"]}')
    output = model(system_prompt + prompt, Top2)
    logging.info(output)
    predictions = Top2.model_validate_json(output)
    predictions = predictions.model_dump()
    predictions = predictions["top"]
    horizontal_prediction = []
    confidence_prediction = []
    argument_prediction = []
    for prediction in predictions:
        horizontal_prediction.append(prediction["horizontal"].value)
        confidence_prediction.append(float(prediction["confidence"]))
        argument_prediction.append(str(prediction["argumentation"]))
    horizontals.append(horizontal_prediction)
    confidences.append(confidence_prediction)
    arguments.append(argument_prediction)

df["Predicted Horizontal"] = horizontals
df["Confidence"] = confidences
df["Argumentation"] = arguments

In [None]:
df[["Horizontal 1", "Horizontal 2"]] = pd.DataFrame(df["Predicted Horizontal"].to_list(), index=df.index)
df[["Confidence 1", "Confidence 2"]] = pd.DataFrame(df["Confidence"].to_list(), index=df.index)
df[["Argumentation 1", "Argumentation 2"]] = pd.DataFrame(df["Argumentation"].to_list(), index=df.index)

In [None]:
df = df.drop(columns=["Predicted Horizontal", "Confidence", "Argumentation"])
df.to_csv("classified_applicants.csv", index=False)