# LinkedIn Influencer Project Experiment

### 1. Scrape LinkedIn Post: This Scrape AI Training Data From LinkedIn Profile for post Personalisation

In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir("../")

In [None]:
%pwd

In [None]:
# !pip install

1.1 Linked Scraper tool

In [None]:
import os
import time

from dotenv import load_dotenv
from crewai_tools import tool

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

from pydantic.v1 import BaseModel, Field

# Load environment variables from .env file
load_dotenv()

In [None]:
# class Config:
#     """
#     A configuration class that fetches environment variables.
#     """
#     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#     MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
#     SERPER_API_KEY = os.getenv("SERPER_API_KEY")
#     LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL")
#     LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD")
#     LINKEDIN_PROFILE_NAME = os.getenv("LINKEDIN_PROFILE_NAME")
    
#     ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

from config.configuration import Config

config = Config()


In [None]:
# Test if environment variables are loaded correctly
print("OPENAI_API_KEY:", Config.OPENAI_API_KEY)
print("MISTRAL_API_KEY:", Config.MISTRAL_API_KEY)
print("SERPER_API_KEY:", Config.SERPER_API_KEY)
print("LINKEDIN_EMAIL:", Config.LINKEDIN_EMAIL)
print("LINKEDIN_PASSWORD:", Config.LINKEDIN_PASSWORD)
print("LINKEDIN_PROFILE_NAME:", Config.LINKEDIN_PROFILE_NAME)
print("ACCESS_TOKEN:", Config.ACCESS_TOKEN)

In [None]:
def parse_html_content(page_source: str):
    """
    Parses the page source HTML of a LinkedIn profile and filters 
    the containers that contain post information.
    """
    linkedin_soup = BeautifulSoup(page_source.encode("utf-8"), "lxml")
    containers = linkedin_soup.find_all("div", {"class": "feed-shared-update-v2"})
    containers = [container for container in containers 
                  if 'activity' in container.get('data-urn', '')]
    return containers

def get_post_content(container, selector, attributes):
    """
    Retrieves the text content from a specific HTML element 
    within a container.
    """
    try:
        element = container.find(selector, attributes)
        if element:
            return element.text.strip()
    except Exception as e:
        print(f"Error extracting post content: {e}")
    return ""

def get_linkedin_posts(page_source: str):
    """
    Uses parse_html_content to identify relevant containers, then 
    extracts the post text from each container.
    """
    containers = parse_html_content(page_source)
    posts = []
    for container in containers:
        post_content = get_post_content(container, "div", {"class": "update-components-text"})
        posts.append(post_content)
    return posts


# from src.linkedIn_agent.utils.common import parse_html_content, get_post_content, get_linkedin_posts

In [None]:
class LinkedinToolException(Exception):
    """
    Custom exception used when LinkedIn credentials are not provided in env variables.
    """
    def __init__(self):
        super().__init__("You need to set the LINKEDIN_EMAIL and LINKEDIN_PASSWORD env variables")
        

def scrape_linkedin_posts_fn() -> str:
    """
    A function that logs into LinkedIn using credentials from environment 
    variables, scrolls through a profile's posts, and returns the posts.
    """
    linkedin_username = os.environ.get("LINKEDIN_EMAIL")
    linkedin_password = os.environ.get("LINKEDIN_PASSWORD")
    linkedin_profile_name = os.environ.get("LINKEDIN_PROFILE_NAME")

    if not (linkedin_username and linkedin_password):
        raise LinkedinToolException()

    # Initialize WebDriver (make sure chromedriver is installed and in PATH)
    browser = webdriver.Chrome()
    browser.get("https://www.linkedin.com/login")

    # Perform login
    username_input = browser.find_element("id", "username")
    password_input = browser.find_element("id", "password")
    username_input.send_keys(linkedin_username)
    password_input.send_keys(linkedin_password)
    password_input.send_keys(Keys.RETURN)

    # Wait for page to load
    time.sleep(3)

    # Navigate to the profile's "Recent Activity"
    browser.get(f"https://www.linkedin.com/in/{linkedin_profile_name}/recent-activity/all/")

    # Scroll to load more posts
    for _ in range(2):
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Extract posts
    posts = get_linkedin_posts(browser.page_source)
    browser.quit()
    return str(posts[:5])

@tool("ScrapeLinkedinPosts")
def scrape_linkedin_posts_tool() -> str:
    """
    A tool that can be used to scrape LinkedIn posts.
    """
    return scrape_linkedin_posts_fn()


# from src.linkedIn_agent.tools.linkedIn_scraper import scrape_linkedin_posts_fn, scrape_linkedin_posts_tool


In [None]:
# posts = scrape_linkedin_posts_fn()

In [None]:
# posts

In [None]:
import openai
from textwrap import dedent
from crewai import Task
from crewai import Agent
from crewai_tools import ScrapeWebsiteTool, SerperDevTool
from src.linkedIn_agent.tools.linkedIn_scraper import scrape_linkedin_posts_tool

# If you have these custom modules:
from langchain_mistralai import ChatMistralAI
from langchain_openai import ChatOpenAI

# Get LLM API Keys from Config
# openai_key = Config.OPENAI_API_KEY
# mistral_key = Config.MISTRAL_API_KEY

openai_llm = ChatOpenAI(api_key=Config.OPENAI_API_KEY, model="gpt-3.5-turbo-0125")
mistral_llm = ChatMistralAI(api_key=os.environ.get("MISTRAL_API_KEY"), model="mistral-large-latest", streaming=False)

# Initialize other tools
scrape_website_tool = ScrapeWebsiteTool()
search_tool = SerperDevTool()

# Define the LinkedIn scraper agent
linkedin_scraper_agent = Agent(
    role="LinkedIn Post Scraper",
    goal="Your goal is to scrape a LinkedIn profile to get a list of posts from the given profile",
    tools=[scrape_linkedin_posts_tool],
    backstory=dedent(
        """
        You are an experienced programmer who excels at web scraping.
        """
    ),
    verbose=True,
    allow_delegation=False,
    llm=openai_llm
)

# Define the scraping task
scrape_linkedin_task = Task(
    description=dedent(
        "Scrape a LinkedIn profile to get some relevant posts"
    ),
    expected_output=dedent(
        "A list of LinkedIn posts obtained from a LinkedIn profile"
    ),
    agent=linkedin_scraper_agent,
)

# Define the web researcher agent
web_researcher_agent = Agent(
    role="Web Researcher",
    goal="Your goal is to search for relevant content about the comparison between Llama 2 and Llama 3",
    tools=[scrape_website_tool, search_tool],
    backstory=dedent(
        """
        You are proficient at searching for specific topics on the web, 
        selecting those that provide more value and information.
        """
    ),
    verbose=True,
    allow_delegation=False,
    llm=openai_llm
)

# Define the web research task
web_research_task = Task(
    description=dedent(
        "Get valuable and high quality web information about the comparison between Llama 2 and Llama 3"
    ),
    expected_output=dedent(
        "Your task is to gather high quality information about the comparison between Llama 2 and Llama 3"
    ),
    agent=web_researcher_agent,
)

# Define the doppelganger agent
doppelganger_agent = Agent(
    role="LinkedIn Post Creator",
    goal="You will create a LinkedIn post comparing Llama 2 and Llama 3 following the writing style observed in the LinkedIn posts scraped by the LinkedIn Post Scraper.",
    backstory=dedent(
        """
        You are an expert in writing LinkedIn posts replicating any influencer style.
        """
    ),
    verbose=True,
    allow_delegation=False,
    llm=openai_llm
)

# Define the final post creation task
create_linkedin_post_task = Task(
    description=dedent(
        "Create a LinkedIn post comparing Llama 2 and Llama 3 following the writing-style expressed in the scraped LinkedIn posts."
    ),
    expected_output=dedent(
        "A high-quality and engaging LinkedIn post comparing Llama 2 and Llama 3. "
        "The LinkedIn post must follow the same writing-style as the one expressed in the scraped LinkedIn posts."
    ),
    agent=doppelganger_agent,
)

# Provide context to the final post creation task
create_linkedin_post_task.context = [scrape_linkedin_task, web_research_task]


In [None]:
from crewai import Crew

# Create a Crew of agents and tasks
crew = Crew(
    agents=[
        linkedin_scraper_agent,
        web_researcher_agent,
        doppelganger_agent,
    ],
    tasks=[
        scrape_linkedin_task,
        web_research_task,
        create_linkedin_post_task,
    ]
)

# Kick off the pipeline
result = crew.kickoff()

In [None]:
print("Here is the result: ")
print(result)