In [2]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate

from dotenv import load_dotenv

load_dotenv()

True

In [27]:
# openai_api_key = '...'

### Load up your HTML from your company page

In [3]:
def get_company_page(company_path):
    y_combinator_url = f"https://www.ycombinator.com{company_path}"
    
    print (y_combinator_url)

    loader = UnstructuredURLLoader(urls=[y_combinator_url])
    return loader.load()


# Get the data of the company you're interested in
data = get_company_page("/companies/gitlab")
    
print (f"You have {len(data)} document(s)")

https://www.ycombinator.com/companies/gitlab
You have 1 document(s)


In [4]:
print (f"Preview of your data:\n\n{data[0].page_content}")

Preview of your data:

Accelerator

About

Apply

FAQ

People

YC Blog

Companies

Startup Directory

Top Companies

Revenue

Valuation

Founder Directory

Launch YC

Startup Jobs

All Jobs

Engineering

Operations

Marketing

Sales

Career Coaching

Pioneer Program 2023

Startup Job Guide

YC Startup Jobs Blog

Upcoming Events

Find a Co-Founder

Library

Startup School

SAFE

Resources

Event Calendar

Newsletter

For Investors

Hacker News

Open main menu

Apply for W2024 batch.

Apply

Home

Companies

GitLab

GitLab

A complete DevOps platform delivered as a single application.

Y Combinator LogoW15

Public

Developer Tools

DevSecOps

Open Source

Company

Jobs

News

http://gitlab.com/

A complete DevOps platform delivered as a single application.

GitLab is the first single application for the entire DevOps lifecycle. Only GitLab enables Concurrent DevOps, unlocking organizations from the constraints of today’s toolchain. GitLab provides unmatched visibility, radical new levels

In [5]:
# Split up the texts so you don't run into token limits
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 800,
    chunk_overlap  = 0
)

In [6]:
docs = text_splitter.split_documents(data)

print (f"You now have {len(docs)} documents")

You now have 7 documents


### Write your custom prompt templates
These will be used for your specific use case:

1. `map_prompt` will be the prompt that is done on your first pass of your documents
2. `combine_prompt` will be the prompt that is used when you combine the outputs of your map pass

In [7]:
map_prompt = """Below is a section of a website about {prospect}

Write a concise summary about {prospect}. If the information is not about {prospect}, exclude it from your summary.

{text}

% CONCISE SUMMARY:"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "prospect"])

In [8]:
combine_prompt = """
Your goal is to write a personalized outbound email from {sales_rep}, a sales rep at {company} to {prospect}.

A good email is personalized and combines information about the two companies on how they can help each other.
Be sure to use value selling: A sales methodology that focuses on how your product or service will provide value to the customer instead of focusing on price or solution.

% INFORMATION ABOUT {company}:
{company_information}

% INFORMATION ABOUT {prospect}:
{text}

% INCLUDE THE FOLLOWING PIECES IN YOUR RESPONSE:
- Start the email with the sentence: "We love that {prospect} helps teams..." then insert what they help teams do.
- The sentence: "We can help you do XYZ by ABC" Replace XYZ with what {prospect} does and ABC with what {company} does 
- A 1-2 sentence description about {company}, be brief
- End your email with a call-to-action such as asking them to set up time to talk more

% YOUR RESPONSE:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["sales_rep", "company", "prospect", \
                                                                         "text", "company_information"])

In [9]:
company_information = """
* RapidRoad helps product teams build product faster
* We have a platform that allows product teams to talk more, exchange ideas, and listen to more customers
* Automated project tracking: RapidRoad could use machine learning algorithms to automatically track project progress, identify potential bottlenecks, and suggest ways to optimize workflows. This could help product teams stay on track and deliver faster results.
* Collaboration tools: RapidRoad could offer built-in collaboration tools, such as shared task lists, real-time messaging, and team calendars. This would make it easier for teams to communicate and work together, even if they are in different locations or time zones.
* Agile methodology support: RapidRoad could be specifically designed to support agile development methodologies, such as Scrum or Kanban. This could include features like sprint planning, backlog management, and burndown charts, which would help teams stay organized and focused on their goals.
"""

### LangChain Magic

In [10]:
llm = OpenAI(temperature=.7)

chain = load_summarize_chain(llm,
                             chain_type="map_reduce",
                             map_prompt=map_prompt_template,
                             combine_prompt=combine_prompt_template,
                            # verbose=True
                            )

In [11]:
output = chain({"input_documents": docs, # The seven docs that were created before
                "company": "RapidRoad", \
                "company_information" : company_information,
                "sales_rep" : "Greg", \
                "prospect" : "GitLab"
               })

In [12]:
print (output['output_text'])


Dear GitLab,

We love that GitLab helps teams collaborate on code, track and manage releases, and automate their workflow. At RapidRoad, we understand the importance of making sure product teams have the tools they need to build product faster and more effectively. We can help you do this by providing a platform that allows product teams to talk more, exchange ideas, and listen to more customers. Our platform also offers automated project tracking with machine learning algorithms, collaboration tools, and support for agile development methodologies. 

We believe that our platform could be a great addition to GitLab and help teams work more efficiently. We’d love to set up a call with you to discuss how our platform could help your teams with their DevOps workflows. 

Thank you for your time and I look forward to hearing from you soon.

Sincerely, 
Greg 
RapidRoad


### Rinse and Repeat: Loop Through Companies

In [13]:
import pandas as pd

In [14]:
data = {
    'Name': ['Findly', 'Maya Labs', 'Phind', 'Juicebox', 'Penguin AI', 'Poly', 'Olli.ai'],
    'Link': ['/companies/findly', '/companies/maya-labs', '/companies/phind', '/companies/juicebox', '/companies/penguin-ai', '/companies/poly', '/companies/olli-ai']
}

df_companies = pd.DataFrame(data)
# df_companies = pd.read_clipboard()

In [15]:
df_companies

Unnamed: 0,Name,Link
0,Findly,/companies/findly
1,Maya Labs,/companies/maya-labs
2,Phind,/companies/phind
3,Juicebox,/companies/juicebox
4,Penguin AI,/companies/penguin-ai
5,Poly,/companies/poly
6,Olli.ai,/companies/olli-ai


In [16]:
# set verbose to false

for i, company in df_companies.iterrows():
    print (f"{i + 1}. {company['Name']}")
    page_data = get_company_page(company['Link'])
    docs = text_splitter.split_documents(page_data)

    output = chain({"input_documents": docs, \
                "company":"RapidRoad", \
                "sales_rep" : "Greg", \
                "prospect" : company['Name'],
                "company_information" : company_information
               })
    
    print (output['output_text'])
    print ("\n\n")

1. Findly
https://www.ycombinator.com/companies/findly
Dear Findly, 

We love that Findly helps teams to gain insights, request reports, and generate visualizations from their data. We can help you further simplify complex data analysis, generate summaries quickly, and support scheduled chats and action-triggered automations to enhance autonomy and efficiency by providing our platform that allows product teams to talk more, exchange ideas, and listen to more customers. 

RapidRoad is a platform that helps product teams build product faster. We offer automated project tracking, collaboration tools, and agile methodology support to help teams stay organized and focused on their goals. 

We would love to discuss how our platform could benefit Findly and help you reach your key performance indicators faster and more efficiently. Let's set up a time to chat more. 

Best, 
Greg



2. Maya Labs
https://www.ycombinator.com/companies/maya-labs

Dear Maya Labs, 

We love that Maya Labs helps tea