In [None]:
!pip install google-generativeai
!pip install pandas
!pip install bs4

In [None]:
import pandas as pd

# Step 1: Import the Clay derived contacts as a CSV to a DataFrame
df = pd.read_csv('clay_contacts.csv')

# Step 2: Drop the irrelevant columns)
df = df.drop(["Find Work Email (2)", "Find Work Email (3)",
              "Find Work Email (4)", "Find Work Email (5)",
              "Find Work Email (6)", "Find Work Email (7)",
              "Enrich Person", "Enrich Person & Company",
              "What's the Hook?", "Find People from External List"], axis=1)

In [None]:
# Step 2b: Optional step for minor data cleanups and formatting
df['Company Domain'] = df['Company Domain'].str.replace(r'^https?://www.', '', regex=True)

df.head()

In [None]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
from google.generativeai.types import GenerateContentResponse
import pandas as pd

# Set up Model Provider API Key (replace with your actual API key)
GEMINI_API_KEY = "FILL IN KEY"  # Replace with your actual API key

MODEL_NAME = "gemini-1.5-flash"

# Function to scrape homepage content from the company's domain
def get_homepage_content(domain):
    try:
        response = requests.get(f'http://{domain}')
        soup = BeautifulSoup(response.content, 'html.parser')

        parsed_text = ""

        main_content = soup.find(['main', 'article'])

        if main_content:
            parsed_text = main_content.get_text()
        else:
            # If no <main> or <article> tag found, extract from paragraphs and headings
            paragraphs = soup.find_all('p')
            headings = soup.find_all(['h1', 'h2', 'h3'])

            # Combine all the text from paragraphs and headings
            parsed_text = ' '.join([tag.get_text() for tag in paragraphs + headings])

        # Clean up the extracted text (remove extra spaces)
        parsed_text = ' '.join(parsed_text.split())

        return parsed_text

    except Exception as e:
        return f"Error fetching content: {str(e)}"

# Function to generate personalized outreach message using Gemini Pro
def generate_outreach_message(homepage_content):
    prompt = f"""
    You are crafting a personalized outreach message for a technical engineering leader. This leader is likely interested in evaluating or testing their LLM (Large Language Model) application.
    We, as the outreach team, provide a cutting-edge LLM evaluation and testing platform. We help teams ensure their LLMs are reliable, accurate, and safe before deploying them to production. We offer tools to measure various aspects of LLM performance, including accuracy, bias, and robustness. 

    Based on the following content from their homepage: 
    
    {homepage_content}

    Generate a personalized outreach message. This message should be under 200 words, with a tone that is casual and not too salesy. The goal is to spark their interest in learning more about our platform.
    The message should have a very clear structure:

    [Subject]:  A concise, compelling subject line (under 10 words)
    [Message]:  The body of the message that includes how our platform helps and references their homepage content.
    
    Return the [Subject] and [Message] as attributes in a JSON object.
    Example output: 
    {{
        "subject": "Evaluating Your LLM App?",
        "message": "Hi there, we noticed on your homepage..."
    }}
    """

    try:
        # Initialize the Generative AI client with your API key
        genai.configure(api_key=GEMINI_API_KEY)

        # Get the specified model
        model = genai.GenerativeModel(MODEL_NAME)

        # Generate content based on the prompt
        response: GenerateContentResponse = model.generate_content(prompt)

        # Extract and return the generated text
        if response and response.text:
            # Assuming the model follows the [Subject] and [Message] format
            response_text = response.text.strip()
            try:
                subject_start = response_text.find("[Subject]:") + len("[Subject]:")
                subject_end = response_text.find("[Message]:")
                subject = response_text[subject_start:subject_end].strip()
                
                message_start = response_text.find("[Message]:") + len("[Message]:")
                message = response_text[message_start:].strip()
                
                return {
                  "subject": subject,
                  "message": message
                }

            except ValueError:
                print("Error: Could not extract Subject and Message. Ensure the model's output adheres to the specified format.")
                return None


        else:
            print("Error: No text generated by the model.")
            return None

    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# Step 3: Iterate over the rows and apply the functions
outreach_messages = []
outreach_subjects = []
for index, row in df.iterrows():
    domain = row['Company Domain']

    # Step 3.1: Get homepage content
    homepage_content = get_homepage_content(domain)

    # Step 3.2: Generate personalized outreach message
    if homepage_content and "Error fetching content" not in homepage_content:
        outreach_content = generate_outreach_message(homepage_content)
        print(outreach_content)
        outreach_subject = outreach_content["subject"]
        outreach_message = outreach_content["message"]
    else:
         outreach_subject = "N/A"
         outreach_message = "Could not fetch content or encountered error."

    # Step 3.3: Append the outreach message
    outreach_subjects.append(outreach_subject)
    outreach_messages.append(outreach_message)

# Step 4: Add the outreach subjects and messages as a new column
df['outreach_message'] = outreach_messages
df['outreach_subject'] = outreach_subjects

# Step 5: Save the updated DataFrame to a new CSV file
df.to_csv('enriched_clay_contacts.csv', index=False)