In [None]:
from dotenv import load_dotenv
from pathlib import Path
import os
import sys
import openai
import csv

env_path = Path('../..') / '.env'
load_dotenv(dotenv_path=env_path)

print(os.getcwd())
sys.path.append('/Users/davidwei/Documents/SellScale/sellscale-api/notebooks/gpt_api')

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY

CURRENT_OPENAI_CHAT_GPT_MODEL = "gpt-3.5-turbo"

In [None]:
def classify_naive_helper(linkedin_bio: str, title: str) -> tuple[str, str]:
    """Classifies a prospect's fit, returning a bool.
    """
    try:
        kate_prompt = f"""I am a sales researcher. This is the Ideal Customer Profile for my target customer:

    Seniority: C-level, VP, Director

    Tiers:
    Tier 1: Project Manager, Digital, PMO, Marketing, IT, Transformation
    Tier 2: Manager, Head of XYZ, Department level people
    Tier 3: Managers of Finance, others

    Tier 1 is considered the best, while Tier 3 is considered the worst

    Here is a potential prospect:
    Title: {title}
    LinkedIn Bio: {linkedin_bio}

    Based on this information, label the person based on if they are the ideal ICP using:

    - "VERY HIGH" - They are very much the right fit
    - "HIGH" - They are likely the right fit
    - “MEDIUM” - they may be the right fit
    - "LOW" - They are unlikely to be the right fit
    - “VERY LOW” - They are most probably not the right fit.

    Include this label next to the word "fit:" based on if this person is the ideal ICP. Then add a new line and say "reason:" with 1-2 sentences describing why this label was chosen. 
        """

        robbie_prompt = f"""I am a sales researcher. This is the Ideal Customer Profile for my target customer:

Seniority: C-level, C-level minus one, Department Head

Relevant Work: Technology leaders, Transformation, Program Management, Marketing

Tiers:
Tier 1: C-Level, C-Level minus one, Department Heads
Tier 2: VPs, Roles that manage teams of 20+ people
Tier 3: High impact roles at a large company

Tier 1 is considered the best, while Tier 2 is okay.

Here is a potential prospect:
Title: {title}
LinkedIn Bio: {linkedin_bio} 

If the individual's biography mentions budgeting or ability to manage large amounts of money, this is a major plus. 

Likewise, mentioning Excel explicitly may betray a more junior position.

Based on this information, label the person based on if they are the ideal ICP using:

- "VERY HIGH" - They are very much the right fit
- "HIGH" - They are likely the right fit
- “MEDIUM” - they may be the right fit
- "LOW" - They are unlikely to be the right fit
- “VERY LOW” - They are most probably not the right fit.

Include this label next to the word "fit:" based on if this person is the ideal ICP. Then add a new line and say "reason:" with 1-2 sentences describing why this label was chosen. 
        """

        response = openai.ChatCompletion.create(
            model=CURRENT_OPENAI_CHAT_GPT_MODEL,
            messages=[{
                "role": "user", "content": robbie_prompt
            }]
        )
        if response is None or response["choices"] is None or len(response["choices"]) == 0:
            return ""

        choices = response["choices"]
        top_choice = choices[0]
        preview = top_choice["message"]["content"].strip()
        fit = preview.split('Fit:')[1].split('Reason:')[0].strip()
        reason = preview.split('Reason:')[1].strip()
        return fit, reason
    except Exception as e:
        return "ERROR", "ERROR"
    

def classify_naive(input_file: str, output_file: str) -> bool:
    from tqdm import tqdm
    """Classifies a prospect's fit, returning a bool but writing to a CSV.
    """
    with open(input_file, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        column_names = next(reader)

        linkedin_bio_index = column_names.index('linkedin_bio')
        title_index = column_names.index('title')
        
        with open(output_file, 'w') as g:
            writer = csv.writer(g)
            writer.writerow(column_names + ['fit'])
            for row in tqdm(reader):
                linkedin_bio = row[linkedin_bio_index]
                title = row[title_index]

                fit, reason = classify_naive_helper(linkedin_bio, title)
                print(fit, reason)

                writer.writerow(row + [fit, reason])

    return True

In [None]:
classify_naive_helper("Some bio", "VP of project management")

In [None]:
classify_naive('robbie_mondaycom_contacts.csv', 'robbie_mondaycom_contacts_output.csv')