In [None]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain_openai import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
import magic

In [None]:
openai_api_key = '...'

In [None]:
def get_company_page(company_path):
    y_combinator_url = f"https://www.ycombinator.com{company_path}"
    
    print (y_combinator_url)

    loader = UnstructuredURLLoader(urls=[y_combinator_url])
    return loader.load()


# Get the data of the company you're interested in
data = get_company_page("/companies/poly")
    
print (f"You have {len(data)} document(s)")

In [None]:
# Split up the texts so you don't run into token limits
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 600,
    chunk_overlap  = 0
)
docs = text_splitter.split_documents(data)
len(docs)

In [None]:
map_prompt = """Below is a section of a website about {prospect}

Write a concise summary about {prospect}. If the information is not about {prospect}, exclude it from your summary.

{text}

% CONCISE SUMMARY:"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "prospect"])

combine_prompt = """
Your goal is to write a personalized outbound email from {sales_rep}, a sales rep at {company} to {prospect}.

A good email is personalized and combines information about the two companies on how they can help each other.
Be sure to use value selling: A sales methodology that focuses on how your product or service will provide value to the customer instead of focusing on price or solution.

% INFORMATION ABOUT {company}:
{company_information}

% INFORMATION ABOUT {prospect}:
{text}

% INCLUDE THE FOLLOWING PIECES IN YOUR RESPONSE:
- Start the email with the sentence: "We love that {prospect} helps teams..." then insert what they help teams do.
- The sentence: "We can help you do XYZ by ABC" Replace XYZ with what {prospect} does and ABC with what {company} does 
- A 1-2 sentence description about {company}, be brief
- End your email with a call-to-action such as asking them to set up time to talk more

% YOUR RESPONSE:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["sales_rep", "company", "prospect", \
                                                                         "text", "company_information"])

company_information = """
* Tablingos transforms data management for businesses seeking to enhance data integrity and usability:
* Comprehensive Data Handling: Tablingos provides a sophisticated platform that empowers businesses to improve the quality and readiness of their data for various applications. It streamlines data validation and transformation through customizable and pre-set rules.
* Advanced Data Management Features: The platform is equipped with dual portals designed for efficient administration and customer data handling, ensuring a seamless data management process.
* Automated Data Processing Monitoring: With real-time notifications and detailed reporting, Tablingos offers a way to closely monitor the progress of data onboarding and processing, helping businesses to stay informed about their data’s status.
* Collaboration and Project Management: Tablingos encourages collaborative efforts through the creation of multiple, segregated workspaces. This feature allows teams to manage projects more effectively, fostering a collaborative environment even in remote or distributed teams.
* Dedication to Clean Data Initiatives: The platform's focus on promoting clean data initiatives underlines its commitment to supporting businesses in leveraging their data more effectively. This approach aims to ensure data cleanliness and reliability, which is crucial for informed decision-making and operational efficiency.
"""

In [None]:
llm = OpenAI(temperature=.6, openai_api_key=openai_api_key)

chain = load_summarize_chain(llm,
                             chain_type="map_reduce",
                             map_prompt=map_prompt_template,
                             combine_prompt=combine_prompt_template,
                             verbose=False
                            )

In [None]:
import pandas as pd
df_companies = pd.read_clipboard()
df_companies

In [None]:
for i, company in df_companies.iterrows():
    print (f"{i + 1}. {company['Name']}")
    page_data = get_company_page(company['Link'])
    docs = text_splitter.split_documents(page_data)

    output = chain({"input_documents": docs, \
                "company":"Tablingos", \
                "sales_rep" : "Ibrahim", \
                "prospect" : company['Name'],
                "company_information" : company_information
               })
    
    print (output['output_text'])
    print ("\n\n")